001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.lang3.text; 018 019import java.util.Arrays; 020 021import org.apache.commons.lang3.StringUtils; 022 023/** 024 * A matcher class that can be queried to determine if a character array 025 * portion matches. 026 * <p> 027 * This class comes complete with various factory methods. 028 * If these do not suffice, you can subclass and implement your own matcher. 029 * 030 * @since 2.2 031 * @version $Id: StrMatcher.java 1436770 2013-01-22 07:09:45Z ggregory $ 032 */ 033public abstract class StrMatcher { 034 035 /** 036 * Matches the comma character. 037 */ 038 private static final StrMatcher COMMA_MATCHER = new CharMatcher(','); 039 /** 040 * Matches the tab character. 041 */ 042 private static final StrMatcher TAB_MATCHER = new CharMatcher('\t'); 043 /** 044 * Matches the space character. 045 */ 046 private static final StrMatcher SPACE_MATCHER = new CharMatcher(' '); 047 /** 048 * Matches the same characters as StringTokenizer, 049 * namely space, tab, newline, formfeed. 050 */ 051 private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray()); 052 /** 053 * Matches the String trim() whitespace characters. 054 */ 055 private static final StrMatcher TRIM_MATCHER = new TrimMatcher(); 056 /** 057 * Matches the double quote character. 058 */ 059 private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\''); 060 /** 061 * Matches the double quote character. 062 */ 063 private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"'); 064 /** 065 * Matches the single or double quote character. 066 */ 067 private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray()); 068 /** 069 * Matches no characters. 070 */ 071 private static final StrMatcher NONE_MATCHER = new NoMatcher(); 072 073 // ----------------------------------------------------------------------- 074 075 /** 076 * Returns a matcher which matches the comma character. 077 * 078 * @return a matcher for a comma 079 */ 080 public static StrMatcher commaMatcher() { 081 return COMMA_MATCHER; 082 } 083 084 /** 085 * Returns a matcher which matches the tab character. 086 * 087 * @return a matcher for a tab 088 */ 089 public static StrMatcher tabMatcher() { 090 return TAB_MATCHER; 091 } 092 093 /** 094 * Returns a matcher which matches the space character. 095 * 096 * @return a matcher for a space 097 */ 098 public static StrMatcher spaceMatcher() { 099 return SPACE_MATCHER; 100 } 101 102 /** 103 * Matches the same characters as StringTokenizer, 104 * namely space, tab, newline and formfeed. 105 * 106 * @return the split matcher 107 */ 108 public static StrMatcher splitMatcher() { 109 return SPLIT_MATCHER; 110 } 111 112 /** 113 * Matches the String trim() whitespace characters. 114 * 115 * @return the trim matcher 116 */ 117 public static StrMatcher trimMatcher() { 118 return TRIM_MATCHER; 119 } 120 121 /** 122 * Returns a matcher which matches the single quote character. 123 * 124 * @return a matcher for a single quote 125 */ 126 public static StrMatcher singleQuoteMatcher() { 127 return SINGLE_QUOTE_MATCHER; 128 } 129 130 /** 131 * Returns a matcher which matches the double quote character. 132 * 133 * @return a matcher for a double quote 134 */ 135 public static StrMatcher doubleQuoteMatcher() { 136 return DOUBLE_QUOTE_MATCHER; 137 } 138 139 /** 140 * Returns a matcher which matches the single or double quote character. 141 * 142 * @return a matcher for a single or double quote 143 */ 144 public static StrMatcher quoteMatcher() { 145 return QUOTE_MATCHER; 146 } 147 148 /** 149 * Matches no characters. 150 * 151 * @return a matcher that matches nothing 152 */ 153 public static StrMatcher noneMatcher() { 154 return NONE_MATCHER; 155 } 156 157 /** 158 * Constructor that creates a matcher from a character. 159 * 160 * @param ch the character to match, must not be null 161 * @return a new Matcher for the given char 162 */ 163 public static StrMatcher charMatcher(final char ch) { 164 return new CharMatcher(ch); 165 } 166 167 /** 168 * Constructor that creates a matcher from a set of characters. 169 * 170 * @param chars the characters to match, null or empty matches nothing 171 * @return a new matcher for the given char[] 172 */ 173 public static StrMatcher charSetMatcher(final char... chars) { 174 if (chars == null || chars.length == 0) { 175 return NONE_MATCHER; 176 } 177 if (chars.length == 1) { 178 return new CharMatcher(chars[0]); 179 } 180 return new CharSetMatcher(chars); 181 } 182 183 /** 184 * Constructor that creates a matcher from a string representing a set of characters. 185 * 186 * @param chars the characters to match, null or empty matches nothing 187 * @return a new Matcher for the given characters 188 */ 189 public static StrMatcher charSetMatcher(final String chars) { 190 if (StringUtils.isEmpty(chars)) { 191 return NONE_MATCHER; 192 } 193 if (chars.length() == 1) { 194 return new CharMatcher(chars.charAt(0)); 195 } 196 return new CharSetMatcher(chars.toCharArray()); 197 } 198 199 /** 200 * Constructor that creates a matcher from a string. 201 * 202 * @param str the string to match, null or empty matches nothing 203 * @return a new Matcher for the given String 204 */ 205 public static StrMatcher stringMatcher(final String str) { 206 if (StringUtils.isEmpty(str)) { 207 return NONE_MATCHER; 208 } 209 return new StringMatcher(str); 210 } 211 212 //----------------------------------------------------------------------- 213 /** 214 * Constructor. 215 */ 216 protected StrMatcher() { 217 super(); 218 } 219 220 /** 221 * Returns the number of matching characters, zero for no match. 222 * <p> 223 * This method is called to check for a match. 224 * The parameter <code>pos</code> represents the current position to be 225 * checked in the string <code>buffer</code> (a character array which must 226 * not be changed). 227 * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>. 228 * <p> 229 * The character array may be larger than the active area to be matched. 230 * Only values in the buffer between the specifed indices may be accessed. 231 * <p> 232 * The matching code may check one character or many. 233 * It may check characters preceding <code>pos</code> as well as those 234 * after, so long as no checks exceed the bounds specified. 235 * <p> 236 * It must return zero for no match, or a positive number if a match was found. 237 * The number indicates the number of characters that matched. 238 * 239 * @param buffer the text content to match against, do not change 240 * @param pos the starting position for the match, valid for buffer 241 * @param bufferStart the first active index in the buffer, valid for buffer 242 * @param bufferEnd the end index (exclusive) of the active buffer, valid for buffer 243 * @return the number of matching characters, zero for no match 244 */ 245 public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd); 246 247 /** 248 * Returns the number of matching characters, zero for no match. 249 * <p> 250 * This method is called to check for a match. 251 * The parameter <code>pos</code> represents the current position to be 252 * checked in the string <code>buffer</code> (a character array which must 253 * not be changed). 254 * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>. 255 * <p> 256 * The matching code may check one character or many. 257 * It may check characters preceding <code>pos</code> as well as those after. 258 * <p> 259 * It must return zero for no match, or a positive number if a match was found. 260 * The number indicates the number of characters that matched. 261 * 262 * @param buffer the text content to match against, do not change 263 * @param pos the starting position for the match, valid for buffer 264 * @return the number of matching characters, zero for no match 265 * @since 2.4 266 */ 267 public int isMatch(final char[] buffer, final int pos) { 268 return isMatch(buffer, pos, 0, buffer.length); 269 } 270 271 //----------------------------------------------------------------------- 272 /** 273 * Class used to define a set of characters for matching purposes. 274 */ 275 static final class CharSetMatcher extends StrMatcher { 276 /** The set of characters to match. */ 277 private final char[] chars; 278 279 /** 280 * Constructor that creates a matcher from a character array. 281 * 282 * @param chars the characters to match, must not be null 283 */ 284 CharSetMatcher(final char chars[]) { 285 super(); 286 this.chars = chars.clone(); 287 Arrays.sort(this.chars); 288 } 289 290 /** 291 * Returns whether or not the given character matches. 292 * 293 * @param buffer the text content to match against, do not change 294 * @param pos the starting position for the match, valid for buffer 295 * @param bufferStart the first active index in the buffer, valid for buffer 296 * @param bufferEnd the end index of the active buffer, valid for buffer 297 * @return the number of matching characters, zero for no match 298 */ 299 @Override 300 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 301 return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0; 302 } 303 } 304 305 //----------------------------------------------------------------------- 306 /** 307 * Class used to define a character for matching purposes. 308 */ 309 static final class CharMatcher extends StrMatcher { 310 /** The character to match. */ 311 private final char ch; 312 313 /** 314 * Constructor that creates a matcher that matches a single character. 315 * 316 * @param ch the character to match 317 */ 318 CharMatcher(final char ch) { 319 super(); 320 this.ch = ch; 321 } 322 323 /** 324 * Returns whether or not the given character matches. 325 * 326 * @param buffer the text content to match against, do not change 327 * @param pos the starting position for the match, valid for buffer 328 * @param bufferStart the first active index in the buffer, valid for buffer 329 * @param bufferEnd the end index of the active buffer, valid for buffer 330 * @return the number of matching characters, zero for no match 331 */ 332 @Override 333 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 334 return ch == buffer[pos] ? 1 : 0; 335 } 336 } 337 338 //----------------------------------------------------------------------- 339 /** 340 * Class used to define a set of characters for matching purposes. 341 */ 342 static final class StringMatcher extends StrMatcher { 343 /** The string to match, as a character array. */ 344 private final char[] chars; 345 346 /** 347 * Constructor that creates a matcher from a String. 348 * 349 * @param str the string to match, must not be null 350 */ 351 StringMatcher(final String str) { 352 super(); 353 chars = str.toCharArray(); 354 } 355 356 /** 357 * Returns whether or not the given text matches the stored string. 358 * 359 * @param buffer the text content to match against, do not change 360 * @param pos the starting position for the match, valid for buffer 361 * @param bufferStart the first active index in the buffer, valid for buffer 362 * @param bufferEnd the end index of the active buffer, valid for buffer 363 * @return the number of matching characters, zero for no match 364 */ 365 @Override 366 public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) { 367 final int len = chars.length; 368 if (pos + len > bufferEnd) { 369 return 0; 370 } 371 for (int i = 0; i < chars.length; i++, pos++) { 372 if (chars[i] != buffer[pos]) { 373 return 0; 374 } 375 } 376 return len; 377 } 378 } 379 380 //----------------------------------------------------------------------- 381 /** 382 * Class used to match no characters. 383 */ 384 static final class NoMatcher extends StrMatcher { 385 386 /** 387 * Constructs a new instance of <code>NoMatcher</code>. 388 */ 389 NoMatcher() { 390 super(); 391 } 392 393 /** 394 * Always returns <code>false</code>. 395 * 396 * @param buffer the text content to match against, do not change 397 * @param pos the starting position for the match, valid for buffer 398 * @param bufferStart the first active index in the buffer, valid for buffer 399 * @param bufferEnd the end index of the active buffer, valid for buffer 400 * @return the number of matching characters, zero for no match 401 */ 402 @Override 403 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 404 return 0; 405 } 406 } 407 408 //----------------------------------------------------------------------- 409 /** 410 * Class used to match whitespace as per trim(). 411 */ 412 static final class TrimMatcher extends StrMatcher { 413 414 /** 415 * Constructs a new instance of <code>TrimMatcher</code>. 416 */ 417 TrimMatcher() { 418 super(); 419 } 420 421 /** 422 * Returns whether or not the given character matches. 423 * 424 * @param buffer the text content to match against, do not change 425 * @param pos the starting position for the match, valid for buffer 426 * @param bufferStart the first active index in the buffer, valid for buffer 427 * @param bufferEnd the end index of the active buffer, valid for buffer 428 * @return the number of matching characters, zero for no match 429 */ 430 @Override 431 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 432 return buffer[pos] <= 32 ? 1 : 0; 433 } 434 } 435 436}