001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.lang3.text; 018 019import java.util.Arrays; 020 021import org.apache.commons.lang3.StringUtils; 022 023/** 024 * A matcher class that can be queried to determine if a character array 025 * portion matches. 026 * <p> 027 * This class comes complete with various factory methods. 028 * If these do not suffice, you can subclass and implement your own matcher. 029 * 030 * @since 2.2 031 */ 032public abstract class StrMatcher { 033 034 /** 035 * Matches the comma character. 036 */ 037 private static final StrMatcher COMMA_MATCHER = new CharMatcher(','); 038 /** 039 * Matches the tab character. 040 */ 041 private static final StrMatcher TAB_MATCHER = new CharMatcher('\t'); 042 /** 043 * Matches the space character. 044 */ 045 private static final StrMatcher SPACE_MATCHER = new CharMatcher(' '); 046 /** 047 * Matches the same characters as StringTokenizer, 048 * namely space, tab, newline, formfeed. 049 */ 050 private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray()); 051 /** 052 * Matches the String trim() whitespace characters. 053 */ 054 private static final StrMatcher TRIM_MATCHER = new TrimMatcher(); 055 /** 056 * Matches the double quote character. 057 */ 058 private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\''); 059 /** 060 * Matches the double quote character. 061 */ 062 private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"'); 063 /** 064 * Matches the single or double quote character. 065 */ 066 private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray()); 067 /** 068 * Matches no characters. 069 */ 070 private static final StrMatcher NONE_MATCHER = new NoMatcher(); 071 072 // ----------------------------------------------------------------------- 073 074 /** 075 * Returns a matcher which matches the comma character. 076 * 077 * @return a matcher for a comma 078 */ 079 public static StrMatcher commaMatcher() { 080 return COMMA_MATCHER; 081 } 082 083 /** 084 * Returns a matcher which matches the tab character. 085 * 086 * @return a matcher for a tab 087 */ 088 public static StrMatcher tabMatcher() { 089 return TAB_MATCHER; 090 } 091 092 /** 093 * Returns a matcher which matches the space character. 094 * 095 * @return a matcher for a space 096 */ 097 public static StrMatcher spaceMatcher() { 098 return SPACE_MATCHER; 099 } 100 101 /** 102 * Matches the same characters as StringTokenizer, 103 * namely space, tab, newline and formfeed. 104 * 105 * @return the split matcher 106 */ 107 public static StrMatcher splitMatcher() { 108 return SPLIT_MATCHER; 109 } 110 111 /** 112 * Matches the String trim() whitespace characters. 113 * 114 * @return the trim matcher 115 */ 116 public static StrMatcher trimMatcher() { 117 return TRIM_MATCHER; 118 } 119 120 /** 121 * Returns a matcher which matches the single quote character. 122 * 123 * @return a matcher for a single quote 124 */ 125 public static StrMatcher singleQuoteMatcher() { 126 return SINGLE_QUOTE_MATCHER; 127 } 128 129 /** 130 * Returns a matcher which matches the double quote character. 131 * 132 * @return a matcher for a double quote 133 */ 134 public static StrMatcher doubleQuoteMatcher() { 135 return DOUBLE_QUOTE_MATCHER; 136 } 137 138 /** 139 * Returns a matcher which matches the single or double quote character. 140 * 141 * @return a matcher for a single or double quote 142 */ 143 public static StrMatcher quoteMatcher() { 144 return QUOTE_MATCHER; 145 } 146 147 /** 148 * Matches no characters. 149 * 150 * @return a matcher that matches nothing 151 */ 152 public static StrMatcher noneMatcher() { 153 return NONE_MATCHER; 154 } 155 156 /** 157 * Constructor that creates a matcher from a character. 158 * 159 * @param ch the character to match, must not be null 160 * @return a new Matcher for the given char 161 */ 162 public static StrMatcher charMatcher(final char ch) { 163 return new CharMatcher(ch); 164 } 165 166 /** 167 * Constructor that creates a matcher from a set of characters. 168 * 169 * @param chars the characters to match, null or empty matches nothing 170 * @return a new matcher for the given char[] 171 */ 172 public static StrMatcher charSetMatcher(final char... chars) { 173 if (chars == null || chars.length == 0) { 174 return NONE_MATCHER; 175 } 176 if (chars.length == 1) { 177 return new CharMatcher(chars[0]); 178 } 179 return new CharSetMatcher(chars); 180 } 181 182 /** 183 * Constructor that creates a matcher from a string representing a set of characters. 184 * 185 * @param chars the characters to match, null or empty matches nothing 186 * @return a new Matcher for the given characters 187 */ 188 public static StrMatcher charSetMatcher(final String chars) { 189 if (StringUtils.isEmpty(chars)) { 190 return NONE_MATCHER; 191 } 192 if (chars.length() == 1) { 193 return new CharMatcher(chars.charAt(0)); 194 } 195 return new CharSetMatcher(chars.toCharArray()); 196 } 197 198 /** 199 * Constructor that creates a matcher from a string. 200 * 201 * @param str the string to match, null or empty matches nothing 202 * @return a new Matcher for the given String 203 */ 204 public static StrMatcher stringMatcher(final String str) { 205 if (StringUtils.isEmpty(str)) { 206 return NONE_MATCHER; 207 } 208 return new StringMatcher(str); 209 } 210 211 //----------------------------------------------------------------------- 212 /** 213 * Constructor. 214 */ 215 protected StrMatcher() { 216 super(); 217 } 218 219 /** 220 * Returns the number of matching characters, zero for no match. 221 * <p> 222 * This method is called to check for a match. 223 * The parameter <code>pos</code> represents the current position to be 224 * checked in the string <code>buffer</code> (a character array which must 225 * not be changed). 226 * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>. 227 * <p> 228 * The character array may be larger than the active area to be matched. 229 * Only values in the buffer between the specified indices may be accessed. 230 * <p> 231 * The matching code may check one character or many. 232 * It may check characters preceding <code>pos</code> as well as those 233 * after, so long as no checks exceed the bounds specified. 234 * <p> 235 * It must return zero for no match, or a positive number if a match was found. 236 * The number indicates the number of characters that matched. 237 * 238 * @param buffer the text content to match against, do not change 239 * @param pos the starting position for the match, valid for buffer 240 * @param bufferStart the first active index in the buffer, valid for buffer 241 * @param bufferEnd the end index (exclusive) of the active buffer, valid for buffer 242 * @return the number of matching characters, zero for no match 243 */ 244 public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd); 245 246 /** 247 * Returns the number of matching characters, zero for no match. 248 * <p> 249 * This method is called to check for a match. 250 * The parameter <code>pos</code> represents the current position to be 251 * checked in the string <code>buffer</code> (a character array which must 252 * not be changed). 253 * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>. 254 * <p> 255 * The matching code may check one character or many. 256 * It may check characters preceding <code>pos</code> as well as those after. 257 * <p> 258 * It must return zero for no match, or a positive number if a match was found. 259 * The number indicates the number of characters that matched. 260 * 261 * @param buffer the text content to match against, do not change 262 * @param pos the starting position for the match, valid for buffer 263 * @return the number of matching characters, zero for no match 264 * @since 2.4 265 */ 266 public int isMatch(final char[] buffer, final int pos) { 267 return isMatch(buffer, pos, 0, buffer.length); 268 } 269 270 //----------------------------------------------------------------------- 271 /** 272 * Class used to define a set of characters for matching purposes. 273 */ 274 static final class CharSetMatcher extends StrMatcher { 275 /** The set of characters to match. */ 276 private final char[] chars; 277 278 /** 279 * Constructor that creates a matcher from a character array. 280 * 281 * @param chars the characters to match, must not be null 282 */ 283 CharSetMatcher(final char chars[]) { 284 super(); 285 this.chars = chars.clone(); 286 Arrays.sort(this.chars); 287 } 288 289 /** 290 * Returns whether or not the given character matches. 291 * 292 * @param buffer the text content to match against, do not change 293 * @param pos the starting position for the match, valid for buffer 294 * @param bufferStart the first active index in the buffer, valid for buffer 295 * @param bufferEnd the end index of the active buffer, valid for buffer 296 * @return the number of matching characters, zero for no match 297 */ 298 @Override 299 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 300 return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0; 301 } 302 } 303 304 //----------------------------------------------------------------------- 305 /** 306 * Class used to define a character for matching purposes. 307 */ 308 static final class CharMatcher extends StrMatcher { 309 /** The character to match. */ 310 private final char ch; 311 312 /** 313 * Constructor that creates a matcher that matches a single character. 314 * 315 * @param ch the character to match 316 */ 317 CharMatcher(final char ch) { 318 super(); 319 this.ch = ch; 320 } 321 322 /** 323 * Returns whether or not the given character matches. 324 * 325 * @param buffer the text content to match against, do not change 326 * @param pos the starting position for the match, valid for buffer 327 * @param bufferStart the first active index in the buffer, valid for buffer 328 * @param bufferEnd the end index of the active buffer, valid for buffer 329 * @return the number of matching characters, zero for no match 330 */ 331 @Override 332 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 333 return ch == buffer[pos] ? 1 : 0; 334 } 335 } 336 337 //----------------------------------------------------------------------- 338 /** 339 * Class used to define a set of characters for matching purposes. 340 */ 341 static final class StringMatcher extends StrMatcher { 342 /** The string to match, as a character array. */ 343 private final char[] chars; 344 345 /** 346 * Constructor that creates a matcher from a String. 347 * 348 * @param str the string to match, must not be null 349 */ 350 StringMatcher(final String str) { 351 super(); 352 chars = str.toCharArray(); 353 } 354 355 /** 356 * Returns whether or not the given text matches the stored string. 357 * 358 * @param buffer the text content to match against, do not change 359 * @param pos the starting position for the match, valid for buffer 360 * @param bufferStart the first active index in the buffer, valid for buffer 361 * @param bufferEnd the end index of the active buffer, valid for buffer 362 * @return the number of matching characters, zero for no match 363 */ 364 @Override 365 public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) { 366 final int len = chars.length; 367 if (pos + len > bufferEnd) { 368 return 0; 369 } 370 for (int i = 0; i < chars.length; i++, pos++) { 371 if (chars[i] != buffer[pos]) { 372 return 0; 373 } 374 } 375 return len; 376 } 377 378 @Override 379 public String toString() { 380 return super.toString() + ' ' + Arrays.toString(chars); 381 } 382 383 } 384 385 //----------------------------------------------------------------------- 386 /** 387 * Class used to match no characters. 388 */ 389 static final class NoMatcher extends StrMatcher { 390 391 /** 392 * Constructs a new instance of <code>NoMatcher</code>. 393 */ 394 NoMatcher() { 395 super(); 396 } 397 398 /** 399 * Always returns <code>false</code>. 400 * 401 * @param buffer the text content to match against, do not change 402 * @param pos the starting position for the match, valid for buffer 403 * @param bufferStart the first active index in the buffer, valid for buffer 404 * @param bufferEnd the end index of the active buffer, valid for buffer 405 * @return the number of matching characters, zero for no match 406 */ 407 @Override 408 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 409 return 0; 410 } 411 } 412 413 //----------------------------------------------------------------------- 414 /** 415 * Class used to match whitespace as per trim(). 416 */ 417 static final class TrimMatcher extends StrMatcher { 418 419 /** 420 * Constructs a new instance of <code>TrimMatcher</code>. 421 */ 422 TrimMatcher() { 423 super(); 424 } 425 426 /** 427 * Returns whether or not the given character matches. 428 * 429 * @param buffer the text content to match against, do not change 430 * @param pos the starting position for the match, valid for buffer 431 * @param bufferStart the first active index in the buffer, valid for buffer 432 * @param bufferEnd the end index of the active buffer, valid for buffer 433 * @return the number of matching characters, zero for no match 434 */ 435 @Override 436 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 437 return buffer[pos] <= 32 ? 1 : 0; 438 } 439 } 440 441}