001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.lang3.text; 018 019import java.util.Arrays; 020 021import org.apache.commons.lang3.StringUtils; 022 023/** 024 * A matcher class that can be queried to determine if a character array 025 * portion matches. 026 * <p> 027 * This class comes complete with various factory methods. 028 * If these do not suffice, you can subclass and implement your own matcher. 029 * 030 * @since 2.2 031 * @deprecated as of 3.6, use commons-text 032 * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/StringMatcherFactory.html"> 033 * StringMatcherFactory</a> instead 034 */ 035@Deprecated 036public abstract class StrMatcher { 037 038 /** 039 * Matches the comma character. 040 */ 041 private static final StrMatcher COMMA_MATCHER = new CharMatcher(','); 042 /** 043 * Matches the tab character. 044 */ 045 private static final StrMatcher TAB_MATCHER = new CharMatcher('\t'); 046 /** 047 * Matches the space character. 048 */ 049 private static final StrMatcher SPACE_MATCHER = new CharMatcher(' '); 050 /** 051 * Matches the same characters as StringTokenizer, 052 * namely space, tab, newline, formfeed. 053 */ 054 private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray()); 055 /** 056 * Matches the String trim() whitespace characters. 057 */ 058 private static final StrMatcher TRIM_MATCHER = new TrimMatcher(); 059 /** 060 * Matches the double quote character. 061 */ 062 private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\''); 063 /** 064 * Matches the double quote character. 065 */ 066 private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"'); 067 /** 068 * Matches the single or double quote character. 069 */ 070 private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray()); 071 /** 072 * Matches no characters. 073 */ 074 private static final StrMatcher NONE_MATCHER = new NoMatcher(); 075 076 // ----------------------------------------------------------------------- 077 078 /** 079 * Returns a matcher which matches the comma character. 080 * 081 * @return a matcher for a comma 082 */ 083 public static StrMatcher commaMatcher() { 084 return COMMA_MATCHER; 085 } 086 087 /** 088 * Returns a matcher which matches the tab character. 089 * 090 * @return a matcher for a tab 091 */ 092 public static StrMatcher tabMatcher() { 093 return TAB_MATCHER; 094 } 095 096 /** 097 * Returns a matcher which matches the space character. 098 * 099 * @return a matcher for a space 100 */ 101 public static StrMatcher spaceMatcher() { 102 return SPACE_MATCHER; 103 } 104 105 /** 106 * Matches the same characters as StringTokenizer, 107 * namely space, tab, newline and formfeed. 108 * 109 * @return the split matcher 110 */ 111 public static StrMatcher splitMatcher() { 112 return SPLIT_MATCHER; 113 } 114 115 /** 116 * Matches the String trim() whitespace characters. 117 * 118 * @return the trim matcher 119 */ 120 public static StrMatcher trimMatcher() { 121 return TRIM_MATCHER; 122 } 123 124 /** 125 * Returns a matcher which matches the single quote character. 126 * 127 * @return a matcher for a single quote 128 */ 129 public static StrMatcher singleQuoteMatcher() { 130 return SINGLE_QUOTE_MATCHER; 131 } 132 133 /** 134 * Returns a matcher which matches the double quote character. 135 * 136 * @return a matcher for a double quote 137 */ 138 public static StrMatcher doubleQuoteMatcher() { 139 return DOUBLE_QUOTE_MATCHER; 140 } 141 142 /** 143 * Returns a matcher which matches the single or double quote character. 144 * 145 * @return a matcher for a single or double quote 146 */ 147 public static StrMatcher quoteMatcher() { 148 return QUOTE_MATCHER; 149 } 150 151 /** 152 * Matches no characters. 153 * 154 * @return a matcher that matches nothing 155 */ 156 public static StrMatcher noneMatcher() { 157 return NONE_MATCHER; 158 } 159 160 /** 161 * Constructor that creates a matcher from a character. 162 * 163 * @param ch the character to match, must not be null 164 * @return a new Matcher for the given char 165 */ 166 public static StrMatcher charMatcher(final char ch) { 167 return new CharMatcher(ch); 168 } 169 170 /** 171 * Constructor that creates a matcher from a set of characters. 172 * 173 * @param chars the characters to match, null or empty matches nothing 174 * @return a new matcher for the given char[] 175 */ 176 public static StrMatcher charSetMatcher(final char... chars) { 177 if (chars == null || chars.length == 0) { 178 return NONE_MATCHER; 179 } 180 if (chars.length == 1) { 181 return new CharMatcher(chars[0]); 182 } 183 return new CharSetMatcher(chars); 184 } 185 186 /** 187 * Constructor that creates a matcher from a string representing a set of characters. 188 * 189 * @param chars the characters to match, null or empty matches nothing 190 * @return a new Matcher for the given characters 191 */ 192 public static StrMatcher charSetMatcher(final String chars) { 193 if (StringUtils.isEmpty(chars)) { 194 return NONE_MATCHER; 195 } 196 if (chars.length() == 1) { 197 return new CharMatcher(chars.charAt(0)); 198 } 199 return new CharSetMatcher(chars.toCharArray()); 200 } 201 202 /** 203 * Constructor that creates a matcher from a string. 204 * 205 * @param str the string to match, null or empty matches nothing 206 * @return a new Matcher for the given String 207 */ 208 public static StrMatcher stringMatcher(final String str) { 209 if (StringUtils.isEmpty(str)) { 210 return NONE_MATCHER; 211 } 212 return new StringMatcher(str); 213 } 214 215 //----------------------------------------------------------------------- 216 /** 217 * Constructor. 218 */ 219 protected StrMatcher() { 220 super(); 221 } 222 223 /** 224 * Returns the number of matching characters, zero for no match. 225 * <p> 226 * This method is called to check for a match. 227 * The parameter {@code pos} represents the current position to be 228 * checked in the string {@code buffer} (a character array which must 229 * not be changed). 230 * The API guarantees that {@code pos} is a valid index for {@code buffer}. 231 * <p> 232 * The character array may be larger than the active area to be matched. 233 * Only values in the buffer between the specified indices may be accessed. 234 * <p> 235 * The matching code may check one character or many. 236 * It may check characters preceding {@code pos} as well as those 237 * after, so long as no checks exceed the bounds specified. 238 * <p> 239 * It must return zero for no match, or a positive number if a match was found. 240 * The number indicates the number of characters that matched. 241 * 242 * @param buffer the text content to match against, do not change 243 * @param pos the starting position for the match, valid for buffer 244 * @param bufferStart the first active index in the buffer, valid for buffer 245 * @param bufferEnd the end index (exclusive) of the active buffer, valid for buffer 246 * @return the number of matching characters, zero for no match 247 */ 248 public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd); 249 250 /** 251 * Returns the number of matching characters, zero for no match. 252 * <p> 253 * This method is called to check for a match. 254 * The parameter {@code pos} represents the current position to be 255 * checked in the string {@code buffer} (a character array which must 256 * not be changed). 257 * The API guarantees that {@code pos} is a valid index for {@code buffer}. 258 * <p> 259 * The matching code may check one character or many. 260 * It may check characters preceding {@code pos} as well as those after. 261 * <p> 262 * It must return zero for no match, or a positive number if a match was found. 263 * The number indicates the number of characters that matched. 264 * 265 * @param buffer the text content to match against, do not change 266 * @param pos the starting position for the match, valid for buffer 267 * @return the number of matching characters, zero for no match 268 * @since 2.4 269 */ 270 public int isMatch(final char[] buffer, final int pos) { 271 return isMatch(buffer, pos, 0, buffer.length); 272 } 273 274 //----------------------------------------------------------------------- 275 /** 276 * Class used to define a set of characters for matching purposes. 277 */ 278 static final class CharSetMatcher extends StrMatcher { 279 /** The set of characters to match. */ 280 private final char[] chars; 281 282 /** 283 * Constructor that creates a matcher from a character array. 284 * 285 * @param chars the characters to match, must not be null 286 */ 287 CharSetMatcher(final char[] chars) { 288 super(); 289 this.chars = chars.clone(); 290 Arrays.sort(this.chars); 291 } 292 293 /** 294 * Returns whether or not the given character matches. 295 * 296 * @param buffer the text content to match against, do not change 297 * @param pos the starting position for the match, valid for buffer 298 * @param bufferStart the first active index in the buffer, valid for buffer 299 * @param bufferEnd the end index of the active buffer, valid for buffer 300 * @return the number of matching characters, zero for no match 301 */ 302 @Override 303 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 304 return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0; 305 } 306 } 307 308 //----------------------------------------------------------------------- 309 /** 310 * Class used to define a character for matching purposes. 311 */ 312 static final class CharMatcher extends StrMatcher { 313 /** The character to match. */ 314 private final char ch; 315 316 /** 317 * Constructor that creates a matcher that matches a single character. 318 * 319 * @param ch the character to match 320 */ 321 CharMatcher(final char ch) { 322 super(); 323 this.ch = ch; 324 } 325 326 /** 327 * Returns whether or not the given character matches. 328 * 329 * @param buffer the text content to match against, do not change 330 * @param pos the starting position for the match, valid for buffer 331 * @param bufferStart the first active index in the buffer, valid for buffer 332 * @param bufferEnd the end index of the active buffer, valid for buffer 333 * @return the number of matching characters, zero for no match 334 */ 335 @Override 336 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 337 return ch == buffer[pos] ? 1 : 0; 338 } 339 } 340 341 //----------------------------------------------------------------------- 342 /** 343 * Class used to define a set of characters for matching purposes. 344 */ 345 static final class StringMatcher extends StrMatcher { 346 /** The string to match, as a character array. */ 347 private final char[] chars; 348 349 /** 350 * Constructor that creates a matcher from a String. 351 * 352 * @param str the string to match, must not be null 353 */ 354 StringMatcher(final String str) { 355 super(); 356 chars = str.toCharArray(); 357 } 358 359 /** 360 * Returns whether or not the given text matches the stored string. 361 * 362 * @param buffer the text content to match against, do not change 363 * @param pos the starting position for the match, valid for buffer 364 * @param bufferStart the first active index in the buffer, valid for buffer 365 * @param bufferEnd the end index of the active buffer, valid for buffer 366 * @return the number of matching characters, zero for no match 367 */ 368 @Override 369 public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) { 370 final int len = chars.length; 371 if (pos + len > bufferEnd) { 372 return 0; 373 } 374 for (int i = 0; i < chars.length; i++, pos++) { 375 if (chars[i] != buffer[pos]) { 376 return 0; 377 } 378 } 379 return len; 380 } 381 382 @Override 383 public String toString() { 384 return super.toString() + ' ' + Arrays.toString(chars); 385 } 386 387 } 388 389 //----------------------------------------------------------------------- 390 /** 391 * Class used to match no characters. 392 */ 393 static final class NoMatcher extends StrMatcher { 394 395 /** 396 * Constructs a new instance of {@code NoMatcher}. 397 */ 398 NoMatcher() { 399 super(); 400 } 401 402 /** 403 * Always returns {@code false}. 404 * 405 * @param buffer the text content to match against, do not change 406 * @param pos the starting position for the match, valid for buffer 407 * @param bufferStart the first active index in the buffer, valid for buffer 408 * @param bufferEnd the end index of the active buffer, valid for buffer 409 * @return the number of matching characters, zero for no match 410 */ 411 @Override 412 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 413 return 0; 414 } 415 } 416 417 //----------------------------------------------------------------------- 418 /** 419 * Class used to match whitespace as per trim(). 420 */ 421 static final class TrimMatcher extends StrMatcher { 422 423 /** 424 * Constructs a new instance of {@code TrimMatcher}. 425 */ 426 TrimMatcher() { 427 super(); 428 } 429 430 /** 431 * Returns whether or not the given character matches. 432 * 433 * @param buffer the text content to match against, do not change 434 * @param pos the starting position for the match, valid for buffer 435 * @param bufferStart the first active index in the buffer, valid for buffer 436 * @param bufferEnd the end index of the active buffer, valid for buffer 437 * @return the number of matching characters, zero for no match 438 */ 439 @Override 440 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 441 return buffer[pos] <= 32 ? 1 : 0; 442 } 443 } 444 445}