001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.text; 018 019import java.util.Arrays; 020 021import org.apache.commons.lang3.ArrayUtils; 022import org.apache.commons.text.matcher.StringMatcherFactory; 023 024/** 025 * A matcher class that can be queried to determine if a character array 026 * portion matches. 027 * <p> 028 * This class comes complete with various factory methods. 029 * If these do not suffice, you can subclass and implement your own matcher. 030 * </p> 031 * 032 * @since 1.0 033 * @deprecated Deprecated as of 1.3, use {@link StringMatcherFactory} instead. This class will be removed in 2.0. 034 */ 035@Deprecated 036public abstract class StrMatcher { 037 038 /** 039 * Class used to define a character for matching purposes. 040 */ 041 private static final class CharMatcher extends StrMatcher { 042 043 /** The character to match. */ 044 private final char ch; 045 046 /** 047 * Constructor that creates a matcher that matches a single character. 048 * 049 * @param ch the character to match 050 */ 051 private CharMatcher(final char ch) { 052 this.ch = ch; 053 } 054 055 /** 056 * Returns {@code 1} if there is a match, or {@code 0} if there is no match. 057 * 058 * @param buffer the text content to match against, do not change 059 * @param pos the starting position for the match, valid for buffer 060 * @param bufferStart the first active index in the buffer, valid for buffer 061 * @param bufferEnd the end index of the active buffer, valid for buffer 062 * @return The number of matching characters, or zero if there is no match 063 */ 064 @Override 065 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 066 return ch == buffer[pos] ? 1 : 0; 067 } 068 } 069 070 /** 071 * Class used to define a set of characters for matching purposes. 072 */ 073 private static final class CharSetMatcher extends StrMatcher { 074 075 /** The set of characters to match. */ 076 private final char[] chars; 077 078 /** 079 * Constructor that creates a matcher from a character array. 080 * 081 * @param chars the characters to match, must not be null 082 */ 083 private CharSetMatcher(final char[] chars) { 084 this.chars = chars.clone(); 085 Arrays.sort(this.chars); 086 } 087 088 /** 089 * Returns {@code 1} if there is a match, or {@code 0} if there is no match. 090 * 091 * @param buffer the text content to match against, do not change 092 * @param pos the starting position for the match, valid for buffer 093 * @param bufferStart the first active index in the buffer, valid for buffer 094 * @param bufferEnd the end index of the active buffer, valid for buffer 095 * @return The number of matching characters, or zero if there is no match 096 */ 097 @Override 098 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 099 return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0; 100 } 101 } 102 103 /** 104 * Class used to match no characters. 105 */ 106 private static final class NoMatcher extends StrMatcher { 107 108 /** 109 * Constructs a new instance of {@code NoMatcher}. 110 */ 111 private NoMatcher() { 112 } 113 114 /** 115 * Always returns {@code 0}. 116 * 117 * @param buffer the text content to match against, do not change 118 * @param pos the starting position for the match, valid for buffer 119 * @param bufferStart the first active index in the buffer, valid for buffer 120 * @param bufferEnd the end index of the active buffer, valid for buffer 121 * @return The number of matching characters, or zero if there is no match 122 */ 123 @Override 124 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 125 return 0; 126 } 127 } 128 129 /** 130 * Class used to define a set of characters for matching purposes. 131 */ 132 private static final class StringMatcher extends StrMatcher { 133 134 /** The string to match, as a character array. */ 135 private final char[] chars; 136 137 /** 138 * Constructor that creates a matcher from a String. 139 * 140 * @param str the string to match, must not be null 141 */ 142 private StringMatcher(final String str) { 143 chars = str.toCharArray(); 144 } 145 146 /** 147 * Returns the number of matching characters, or zero if there is no match. 148 * 149 * @param buffer the text content to match against, do not change 150 * @param pos the starting position for the match, valid for buffer 151 * @param bufferStart the first active index in the buffer, valid for buffer 152 * @param bufferEnd the end index of the active buffer, valid for buffer 153 * @return The number of matching characters, or zero if there is no match 154 */ 155 @Override 156 public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) { 157 final int len = chars.length; 158 if (pos + len > bufferEnd) { 159 return 0; 160 } 161 for (int i = 0; i < chars.length; i++, pos++) { 162 if (chars[i] != buffer[pos]) { 163 return 0; 164 } 165 } 166 return len; 167 } 168 169 @Override 170 public String toString() { 171 return super.toString() + ' ' + Arrays.toString(chars); 172 } 173 174 } 175 176 /** 177 * Class used to match whitespace as per trim(). 178 */ 179 private static final class TrimMatcher extends StrMatcher { 180 181 /** 182 * Constructs a new instance of {@code TrimMatcher}. 183 */ 184 private TrimMatcher() { 185 } 186 187 /** 188 * Returns whether or not the given character matches. 189 * 190 * @param buffer the text content to match against, do not change 191 * @param pos the starting position for the match, valid for buffer 192 * @param bufferStart the first active index in the buffer, valid for buffer 193 * @param bufferEnd the end index of the active buffer, valid for buffer 194 * @return The number of matching characters, or zero if there is no match 195 */ 196 @Override 197 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 198 return buffer[pos] <= 32 ? 1 : 0; 199 } 200 } 201 202 /** 203 * Matches the comma character. 204 */ 205 private static final StrMatcher COMMA_MATCHER = new CharMatcher(','); 206 207 /** 208 * Matches the tab character. 209 */ 210 private static final StrMatcher TAB_MATCHER = new CharMatcher('\t'); 211 212 /** 213 * Matches the space character. 214 */ 215 private static final StrMatcher SPACE_MATCHER = new CharMatcher(' '); 216 217 /** 218 * Matches the same characters as StringTokenizer, 219 * namely space, tab, newline, form feed. 220 */ 221 private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray()); 222 223 /** 224 * Matches the String trim() whitespace characters. 225 */ 226 private static final StrMatcher TRIM_MATCHER = new TrimMatcher(); 227 228 /** 229 * Matches the double quote character. 230 */ 231 private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\''); 232 233 /** 234 * Matches the double quote character. 235 */ 236 private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"'); 237 238 /** 239 * Matches the single or double quote character. 240 */ 241 private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray()); 242 243 /** 244 * Matches no characters. 245 */ 246 private static final StrMatcher NONE_MATCHER = new NoMatcher(); 247 248 /** 249 * Creates a matcher from a character. 250 * 251 * @param ch the character to match, must not be null 252 * @return a new Matcher for the given char 253 */ 254 public static StrMatcher charMatcher(final char ch) { 255 return new CharMatcher(ch); 256 } 257 258 /** 259 * Creates a matcher from a set of characters. 260 * 261 * @param chars the characters to match, null or empty matches nothing 262 * @return a new matcher for the given char[] 263 */ 264 public static StrMatcher charSetMatcher(final char... chars) { 265 if (ArrayUtils.isEmpty(chars)) { 266 return NONE_MATCHER; 267 } 268 if (chars.length == 1) { 269 return new CharMatcher(chars[0]); 270 } 271 return new CharSetMatcher(chars); 272 } 273 274 /** 275 * Creates a matcher from a string representing a set of characters. 276 * 277 * @param chars the characters to match, null or empty matches nothing 278 * @return a new Matcher for the given characters 279 */ 280 public static StrMatcher charSetMatcher(final String chars) { 281 if (chars == null || chars.isEmpty()) { 282 return NONE_MATCHER; 283 } 284 if (chars.length() == 1) { 285 return new CharMatcher(chars.charAt(0)); 286 } 287 return new CharSetMatcher(chars.toCharArray()); 288 } 289 290 /** 291 * Returns a matcher which matches the comma character. 292 * 293 * @return a matcher for a comma 294 */ 295 public static StrMatcher commaMatcher() { 296 return COMMA_MATCHER; 297 } 298 299 /** 300 * Returns a matcher which matches the double quote character. 301 * 302 * @return a matcher for a double quote 303 */ 304 public static StrMatcher doubleQuoteMatcher() { 305 return DOUBLE_QUOTE_MATCHER; 306 } 307 308 /** 309 * Matches no characters. 310 * 311 * @return a matcher that matches nothing 312 */ 313 public static StrMatcher noneMatcher() { 314 return NONE_MATCHER; 315 } 316 317 /** 318 * Returns a matcher which matches the single or double quote character. 319 * 320 * @return a matcher for a single or double quote 321 */ 322 public static StrMatcher quoteMatcher() { 323 return QUOTE_MATCHER; 324 } 325 326 /** 327 * Returns a matcher which matches the single quote character. 328 * 329 * @return a matcher for a single quote 330 */ 331 public static StrMatcher singleQuoteMatcher() { 332 return SINGLE_QUOTE_MATCHER; 333 } 334 335 /** 336 * Returns a matcher which matches the space character. 337 * 338 * @return a matcher for a space 339 */ 340 public static StrMatcher spaceMatcher() { 341 return SPACE_MATCHER; 342 } 343 344 /** 345 * Matches the same characters as StringTokenizer, 346 * namely space, tab, newline and form feed. 347 * 348 * @return The split matcher 349 */ 350 public static StrMatcher splitMatcher() { 351 return SPLIT_MATCHER; 352 } 353 354 /** 355 * Creates a matcher from a string. 356 * 357 * @param str the string to match, null or empty matches nothing 358 * @return a new Matcher for the given String 359 */ 360 public static StrMatcher stringMatcher(final String str) { 361 if (str == null || str.isEmpty()) { 362 return NONE_MATCHER; 363 } 364 return new StringMatcher(str); 365 } 366 367 /** 368 * Returns a matcher which matches the tab character. 369 * 370 * @return a matcher for a tab 371 */ 372 public static StrMatcher tabMatcher() { 373 return TAB_MATCHER; 374 } 375 376 /** 377 * Matches the String trim() whitespace characters. 378 * 379 * @return The trim matcher 380 */ 381 public static StrMatcher trimMatcher() { 382 return TRIM_MATCHER; 383 } 384 385 /** 386 * Constructs a new instance. 387 */ 388 protected StrMatcher() { 389 } 390 391 /** 392 * Returns the number of matching characters, or zero if there is no match. 393 * <p> 394 * This method is called to check for a match. 395 * The parameter {@code pos} represents the current position to be 396 * checked in the string {@code buffer} (a character array which must 397 * not be changed). 398 * The API guarantees that {@code pos} is a valid index for {@code buffer}. 399 * </p> 400 * <p> 401 * The matching code may check one character or many. 402 * It may check characters preceding {@code pos} as well as those after. 403 * </p> 404 * <p> 405 * It must return zero for no match, or a positive number if a match was found. 406 * The number indicates the number of characters that matched. 407 * </p> 408 * 409 * @param buffer the text content to match against, do not change 410 * @param pos the starting position for the match, valid for buffer 411 * @return The number of matching characters, or zero if there is no match 412 */ 413 public int isMatch(final char[] buffer, final int pos) { 414 return isMatch(buffer, pos, 0, buffer.length); 415 } 416 417 /** 418 * Returns the number of matching characters, or zero if there is no match. 419 * <p> 420 * This method is called to check for a match. 421 * The parameter {@code pos} represents the current position to be 422 * checked in the string {@code buffer} (a character array which must 423 * not be changed). 424 * The API guarantees that {@code pos} is a valid index for {@code buffer}. 425 * </p> 426 * <p> 427 * The character array may be larger than the active area to be matched. 428 * Only values in the buffer between the specified indices may be accessed. 429 * </p> 430 * <p> 431 * The matching code may check one character or many. 432 * It may check characters preceding {@code pos} as well as those 433 * after, so long as no checks exceed the bounds specified. 434 * </p> 435 * <p> 436 * It must return zero for no match, or a positive number if a match was found. 437 * The number indicates the number of characters that matched. 438 * </p> 439 * 440 * @param buffer the text content to match against, do not change 441 * @param pos the starting position for the match, valid for buffer 442 * @param bufferStart the first active index in the buffer, valid for buffer 443 * @param bufferEnd the end index (exclusive) of the active buffer, valid for buffer 444 * @return The number of matching characters, or zero if there is no match 445 */ 446 public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd); 447 448}