001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.text; 018 019import java.util.Arrays; 020 021import org.apache.commons.lang3.ArrayUtils; 022import org.apache.commons.text.matcher.StringMatcherFactory; 023 024/** 025 * A matcher class that can be queried to determine if a character array 026 * portion matches. 027 * <p> 028 * This class comes complete with various factory methods. 029 * If these do not suffice, you can subclass and implement your own matcher. 030 * 031 * @since 1.0 032 * @deprecated Deprecated as of 1.3, use {@link StringMatcherFactory} instead. This class will be removed in 2.0. 033 */ 034@Deprecated 035public abstract class StrMatcher { 036 037 /** 038 * Matches the comma character. 039 */ 040 private static final StrMatcher COMMA_MATCHER = new CharMatcher(','); 041 042 /** 043 * Matches the tab character. 044 */ 045 private static final StrMatcher TAB_MATCHER = new CharMatcher('\t'); 046 047 /** 048 * Matches the space character. 049 */ 050 private static final StrMatcher SPACE_MATCHER = new CharMatcher(' '); 051 052 /** 053 * Matches the same characters as StringTokenizer, 054 * namely space, tab, newline, form feed. 055 */ 056 private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray()); 057 058 /** 059 * Matches the String trim() whitespace characters. 060 */ 061 private static final StrMatcher TRIM_MATCHER = new TrimMatcher(); 062 063 /** 064 * Matches the double quote character. 065 */ 066 private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\''); 067 068 /** 069 * Matches the double quote character. 070 */ 071 private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"'); 072 073 /** 074 * Matches the single or double quote character. 075 */ 076 private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray()); 077 078 /** 079 * Matches no characters. 080 */ 081 private static final StrMatcher NONE_MATCHER = new NoMatcher(); 082 083 // ----------------------------------------------------------------------- 084 085 /** 086 * Returns a matcher which matches the comma character. 087 * 088 * @return a matcher for a comma 089 */ 090 public static StrMatcher commaMatcher() { 091 return COMMA_MATCHER; 092 } 093 094 /** 095 * Returns a matcher which matches the tab character. 096 * 097 * @return a matcher for a tab 098 */ 099 public static StrMatcher tabMatcher() { 100 return TAB_MATCHER; 101 } 102 103 /** 104 * Returns a matcher which matches the space character. 105 * 106 * @return a matcher for a space 107 */ 108 public static StrMatcher spaceMatcher() { 109 return SPACE_MATCHER; 110 } 111 112 /** 113 * Matches the same characters as StringTokenizer, 114 * namely space, tab, newline and form feed. 115 * 116 * @return The split matcher 117 */ 118 public static StrMatcher splitMatcher() { 119 return SPLIT_MATCHER; 120 } 121 122 /** 123 * Matches the String trim() whitespace characters. 124 * 125 * @return The trim matcher 126 */ 127 public static StrMatcher trimMatcher() { 128 return TRIM_MATCHER; 129 } 130 131 /** 132 * Returns a matcher which matches the single quote character. 133 * 134 * @return a matcher for a single quote 135 */ 136 public static StrMatcher singleQuoteMatcher() { 137 return SINGLE_QUOTE_MATCHER; 138 } 139 140 /** 141 * Returns a matcher which matches the double quote character. 142 * 143 * @return a matcher for a double quote 144 */ 145 public static StrMatcher doubleQuoteMatcher() { 146 return DOUBLE_QUOTE_MATCHER; 147 } 148 149 /** 150 * Returns a matcher which matches the single or double quote character. 151 * 152 * @return a matcher for a single or double quote 153 */ 154 public static StrMatcher quoteMatcher() { 155 return QUOTE_MATCHER; 156 } 157 158 /** 159 * Matches no characters. 160 * 161 * @return a matcher that matches nothing 162 */ 163 public static StrMatcher noneMatcher() { 164 return NONE_MATCHER; 165 } 166 167 /** 168 * Creates a matcher from a character. 169 * 170 * @param ch the character to match, must not be null 171 * @return a new Matcher for the given char 172 */ 173 public static StrMatcher charMatcher(final char ch) { 174 return new CharMatcher(ch); 175 } 176 177 /** 178 * Creates a matcher from a set of characters. 179 * 180 * @param chars the characters to match, null or empty matches nothing 181 * @return a new matcher for the given char[] 182 */ 183 public static StrMatcher charSetMatcher(final char... chars) { 184 if (ArrayUtils.isEmpty(chars)) { 185 return NONE_MATCHER; 186 } 187 if (chars.length == 1) { 188 return new CharMatcher(chars[0]); 189 } 190 return new CharSetMatcher(chars); 191 } 192 193 /** 194 * Creates a matcher from a string representing a set of characters. 195 * 196 * @param chars the characters to match, null or empty matches nothing 197 * @return a new Matcher for the given characters 198 */ 199 public static StrMatcher charSetMatcher(final String chars) { 200 if (chars == null || chars.length() == 0) { 201 return NONE_MATCHER; 202 } 203 if (chars.length() == 1) { 204 return new CharMatcher(chars.charAt(0)); 205 } 206 return new CharSetMatcher(chars.toCharArray()); 207 } 208 209 /** 210 * Creates a matcher from a string. 211 * 212 * @param str the string to match, null or empty matches nothing 213 * @return a new Matcher for the given String 214 */ 215 public static StrMatcher stringMatcher(final String str) { 216 if (str == null || str.length() == 0) { 217 return NONE_MATCHER; 218 } 219 return new StringMatcher(str); 220 } 221 222 //----------------------------------------------------------------------- 223 /** 224 * Constructor. 225 */ 226 protected StrMatcher() { 227 super(); 228 } 229 230 /** 231 * Returns the number of matching characters, or zero if there is no match. 232 * <p> 233 * This method is called to check for a match. 234 * The parameter {@code pos} represents the current position to be 235 * checked in the string {@code buffer} (a character array which must 236 * not be changed). 237 * The API guarantees that {@code pos} is a valid index for {@code buffer}. 238 * <p> 239 * The character array may be larger than the active area to be matched. 240 * Only values in the buffer between the specified indices may be accessed. 241 * <p> 242 * The matching code may check one character or many. 243 * It may check characters preceding {@code pos} as well as those 244 * after, so long as no checks exceed the bounds specified. 245 * <p> 246 * It must return zero for no match, or a positive number if a match was found. 247 * The number indicates the number of characters that matched. 248 * 249 * @param buffer the text content to match against, do not change 250 * @param pos the starting position for the match, valid for buffer 251 * @param bufferStart the first active index in the buffer, valid for buffer 252 * @param bufferEnd the end index (exclusive) of the active buffer, valid for buffer 253 * @return The number of matching characters, or zero if there is no match 254 */ 255 public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd); 256 257 /** 258 * Returns the number of matching characters, or zero if there is no match. 259 * <p> 260 * This method is called to check for a match. 261 * The parameter {@code pos} represents the current position to be 262 * checked in the string {@code buffer} (a character array which must 263 * not be changed). 264 * The API guarantees that {@code pos} is a valid index for {@code buffer}. 265 * <p> 266 * The matching code may check one character or many. 267 * It may check characters preceding {@code pos} as well as those after. 268 * <p> 269 * It must return zero for no match, or a positive number if a match was found. 270 * The number indicates the number of characters that matched. 271 * 272 * @param buffer the text content to match against, do not change 273 * @param pos the starting position for the match, valid for buffer 274 * @return The number of matching characters, or zero if there is no match 275 */ 276 public int isMatch(final char[] buffer, final int pos) { 277 return isMatch(buffer, pos, 0, buffer.length); 278 } 279 280 //----------------------------------------------------------------------- 281 /** 282 * Class used to define a set of characters for matching purposes. 283 */ 284 static final class CharSetMatcher extends StrMatcher { 285 /** The set of characters to match. */ 286 private final char[] chars; 287 288 /** 289 * Constructor that creates a matcher from a character array. 290 * 291 * @param chars the characters to match, must not be null 292 */ 293 CharSetMatcher(final char[] chars) { 294 super(); 295 this.chars = chars.clone(); 296 Arrays.sort(this.chars); 297 } 298 299 /** 300 * Returns {@code 1} if there is a match, or {@code 0} if there is no match. 301 * 302 * @param buffer the text content to match against, do not change 303 * @param pos the starting position for the match, valid for buffer 304 * @param bufferStart the first active index in the buffer, valid for buffer 305 * @param bufferEnd the end index of the active buffer, valid for buffer 306 * @return The number of matching characters, or zero if there is no match 307 */ 308 @Override 309 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 310 return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0; 311 } 312 } 313 314 //----------------------------------------------------------------------- 315 /** 316 * Class used to define a character for matching purposes. 317 */ 318 static final class CharMatcher extends StrMatcher { 319 /** The character to match. */ 320 private final char ch; 321 322 /** 323 * Constructor that creates a matcher that matches a single character. 324 * 325 * @param ch the character to match 326 */ 327 CharMatcher(final char ch) { 328 super(); 329 this.ch = ch; 330 } 331 332 /** 333 * Returns {@code 1} if there is a match, or {@code 0} if there is no match. 334 * 335 * @param buffer the text content to match against, do not change 336 * @param pos the starting position for the match, valid for buffer 337 * @param bufferStart the first active index in the buffer, valid for buffer 338 * @param bufferEnd the end index of the active buffer, valid for buffer 339 * @return The number of matching characters, or zero if there is no match 340 */ 341 @Override 342 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 343 return ch == buffer[pos] ? 1 : 0; 344 } 345 } 346 347 //----------------------------------------------------------------------- 348 /** 349 * Class used to define a set of characters for matching purposes. 350 */ 351 static final class StringMatcher extends StrMatcher { 352 /** The string to match, as a character array. */ 353 private final char[] chars; 354 355 /** 356 * Constructor that creates a matcher from a String. 357 * 358 * @param str the string to match, must not be null 359 */ 360 StringMatcher(final String str) { 361 super(); 362 chars = str.toCharArray(); 363 } 364 365 /** 366 * Returns the number of matching characters, or zero if there is no match. 367 * 368 * @param buffer the text content to match against, do not change 369 * @param pos the starting position for the match, valid for buffer 370 * @param bufferStart the first active index in the buffer, valid for buffer 371 * @param bufferEnd the end index of the active buffer, valid for buffer 372 * @return The number of matching characters, or zero if there is no match 373 */ 374 @Override 375 public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) { 376 final int len = chars.length; 377 if (pos + len > bufferEnd) { 378 return 0; 379 } 380 for (int i = 0; i < chars.length; i++, pos++) { 381 if (chars[i] != buffer[pos]) { 382 return 0; 383 } 384 } 385 return len; 386 } 387 388 @Override 389 public String toString() { 390 return super.toString() + ' ' + Arrays.toString(chars); 391 } 392 393 } 394 395 //----------------------------------------------------------------------- 396 /** 397 * Class used to match no characters. 398 */ 399 static final class NoMatcher extends StrMatcher { 400 401 /** 402 * Constructs a new instance of {@code NoMatcher}. 403 */ 404 NoMatcher() { 405 super(); 406 } 407 408 /** 409 * Always returns {@code 0}. 410 * 411 * @param buffer the text content to match against, do not change 412 * @param pos the starting position for the match, valid for buffer 413 * @param bufferStart the first active index in the buffer, valid for buffer 414 * @param bufferEnd the end index of the active buffer, valid for buffer 415 * @return The number of matching characters, or zero if there is no match 416 */ 417 @Override 418 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 419 return 0; 420 } 421 } 422 423 //----------------------------------------------------------------------- 424 /** 425 * Class used to match whitespace as per trim(). 426 */ 427 static final class TrimMatcher extends StrMatcher { 428 429 /** 430 * Constructs a new instance of {@code TrimMatcher}. 431 */ 432 TrimMatcher() { 433 super(); 434 } 435 436 /** 437 * Returns whether or not the given character matches. 438 * 439 * @param buffer the text content to match against, do not change 440 * @param pos the starting position for the match, valid for buffer 441 * @param bufferStart the first active index in the buffer, valid for buffer 442 * @param bufferEnd the end index of the active buffer, valid for buffer 443 * @return The number of matching characters, or zero if there is no match 444 */ 445 @Override 446 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 447 return buffer[pos] <= 32 ? 1 : 0; 448 } 449 } 450 451}