001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.lang.text; 018 019 import java.util.Arrays; 020 021 /** 022 * A matcher class that can be queried to determine if a character array 023 * portion matches. 024 * <p> 025 * This class comes complete with various factory methods. 026 * If these do not suffice, you can subclass and implement your own matcher. 027 * 028 * @author Apache Software Foundation 029 * @since 2.2 030 * @version $Id: StrMatcher.java 905636 2010-02-02 14:03:32Z niallp $ 031 */ 032 public abstract class StrMatcher { 033 034 /** 035 * Matches the comma character. 036 */ 037 private static final StrMatcher COMMA_MATCHER = new CharMatcher(','); 038 /** 039 * Matches the tab character. 040 */ 041 private static final StrMatcher TAB_MATCHER = new CharMatcher('\t'); 042 /** 043 * Matches the space character. 044 */ 045 private static final StrMatcher SPACE_MATCHER = new CharMatcher(' '); 046 /** 047 * Matches the same characters as StringTokenizer, 048 * namely space, tab, newline, formfeed. 049 */ 050 private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray()); 051 /** 052 * Matches the String trim() whitespace characters. 053 */ 054 private static final StrMatcher TRIM_MATCHER = new TrimMatcher(); 055 /** 056 * Matches the double quote character. 057 */ 058 private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\''); 059 /** 060 * Matches the double quote character. 061 */ 062 private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"'); 063 /** 064 * Matches the single or double quote character. 065 */ 066 private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray()); 067 /** 068 * Matches no characters. 069 */ 070 private static final StrMatcher NONE_MATCHER = new NoMatcher(); 071 072 // ----------------------------------------------------------------------- 073 074 /** 075 * Returns a matcher which matches the comma character. 076 * 077 * @return a matcher for a comma 078 */ 079 public static StrMatcher commaMatcher() { 080 return COMMA_MATCHER; 081 } 082 083 /** 084 * Returns a matcher which matches the tab character. 085 * 086 * @return a matcher for a tab 087 */ 088 public static StrMatcher tabMatcher() { 089 return TAB_MATCHER; 090 } 091 092 /** 093 * Returns a matcher which matches the space character. 094 * 095 * @return a matcher for a space 096 */ 097 public static StrMatcher spaceMatcher() { 098 return SPACE_MATCHER; 099 } 100 101 /** 102 * Matches the same characters as StringTokenizer, 103 * namely space, tab, newline and formfeed. 104 * 105 * @return the split matcher 106 */ 107 public static StrMatcher splitMatcher() { 108 return SPLIT_MATCHER; 109 } 110 111 /** 112 * Matches the String trim() whitespace characters. 113 * 114 * @return the trim matcher 115 */ 116 public static StrMatcher trimMatcher() { 117 return TRIM_MATCHER; 118 } 119 120 /** 121 * Returns a matcher which matches the single quote character. 122 * 123 * @return a matcher for a single quote 124 */ 125 public static StrMatcher singleQuoteMatcher() { 126 return SINGLE_QUOTE_MATCHER; 127 } 128 129 /** 130 * Returns a matcher which matches the double quote character. 131 * 132 * @return a matcher for a double quote 133 */ 134 public static StrMatcher doubleQuoteMatcher() { 135 return DOUBLE_QUOTE_MATCHER; 136 } 137 138 /** 139 * Returns a matcher which matches the single or double quote character. 140 * 141 * @return a matcher for a single or double quote 142 */ 143 public static StrMatcher quoteMatcher() { 144 return QUOTE_MATCHER; 145 } 146 147 /** 148 * Matches no characters. 149 * 150 * @return a matcher that matches nothing 151 */ 152 public static StrMatcher noneMatcher() { 153 return NONE_MATCHER; 154 } 155 156 /** 157 * Constructor that creates a matcher from a character. 158 * 159 * @param ch the character to match, must not be null 160 * @return a new Matcher for the given char 161 */ 162 public static StrMatcher charMatcher(char ch) { 163 return new CharMatcher(ch); 164 } 165 166 /** 167 * Constructor that creates a matcher from a set of characters. 168 * 169 * @param chars the characters to match, null or empty matches nothing 170 * @return a new matcher for the given char[] 171 */ 172 public static StrMatcher charSetMatcher(char[] chars) { 173 if (chars == null || chars.length == 0) { 174 return NONE_MATCHER; 175 } 176 if (chars.length == 1) { 177 return new CharMatcher(chars[0]); 178 } 179 return new CharSetMatcher(chars); 180 } 181 182 /** 183 * Constructor that creates a matcher from a string representing a set of characters. 184 * 185 * @param chars the characters to match, null or empty matches nothing 186 * @return a new Matcher for the given characters 187 */ 188 public static StrMatcher charSetMatcher(String chars) { 189 if (chars == null || chars.length() == 0) { 190 return NONE_MATCHER; 191 } 192 if (chars.length() == 1) { 193 return new CharMatcher(chars.charAt(0)); 194 } 195 return new CharSetMatcher(chars.toCharArray()); 196 } 197 198 /** 199 * Constructor that creates a matcher from a string. 200 * 201 * @param str the string to match, null or empty matches nothing 202 * @return a new Matcher for the given String 203 */ 204 public static StrMatcher stringMatcher(String str) { 205 if (str == null || str.length() == 0) { 206 return NONE_MATCHER; 207 } 208 return new StringMatcher(str); 209 } 210 211 //----------------------------------------------------------------------- 212 /** 213 * Constructor. 214 */ 215 protected StrMatcher() { 216 super(); 217 } 218 219 /** 220 * Returns the number of matching characters, zero for no match. 221 * <p> 222 * This method is called to check for a match. 223 * The parameter <code>pos</code> represents the current position to be 224 * checked in the string <code>buffer</code> (a character array which must 225 * not be changed). 226 * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>. 227 * <p> 228 * The character array may be larger than the active area to be matched. 229 * Only values in the buffer between the specifed indices may be accessed. 230 * <p> 231 * The matching code may check one character or many. 232 * It may check characters preceeding <code>pos</code> as well as those 233 * after, so long as no checks exceed the bounds specified. 234 * <p> 235 * It must return zero for no match, or a positive number if a match was found. 236 * The number indicates the number of characters that matched. 237 * 238 * @param buffer the text content to match against, do not change 239 * @param pos the starting position for the match, valid for buffer 240 * @param bufferStart the first active index in the buffer, valid for buffer 241 * @param bufferEnd the end index (exclusive) of the active buffer, valid for buffer 242 * @return the number of matching characters, zero for no match 243 */ 244 public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd); 245 246 /** 247 * Returns the number of matching characters, zero for no match. 248 * <p> 249 * This method is called to check for a match. 250 * The parameter <code>pos</code> represents the current position to be 251 * checked in the string <code>buffer</code> (a character array which must 252 * not be changed). 253 * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>. 254 * <p> 255 * The matching code may check one character or many. 256 * It may check characters preceeding <code>pos</code> as well as those after. 257 * <p> 258 * It must return zero for no match, or a positive number if a match was found. 259 * The number indicates the number of characters that matched. 260 * 261 * @param buffer the text content to match against, do not change 262 * @param pos the starting position for the match, valid for buffer 263 * @return the number of matching characters, zero for no match 264 * @since 2.4 265 */ 266 public int isMatch(char[] buffer, int pos) { 267 return isMatch(buffer, pos, 0, buffer.length); 268 } 269 270 //----------------------------------------------------------------------- 271 /** 272 * Class used to define a set of characters for matching purposes. 273 */ 274 static final class CharSetMatcher extends StrMatcher { 275 /** The set of characters to match. */ 276 private final char[] chars; 277 278 /** 279 * Constructor that creates a matcher from a character array. 280 * 281 * @param chars the characters to match, must not be null 282 */ 283 CharSetMatcher(char chars[]) { 284 super(); 285 this.chars = (char[]) chars.clone(); 286 Arrays.sort(this.chars); 287 } 288 289 /** 290 * Returns whether or not the given character matches. 291 * 292 * @param buffer the text content to match against, do not change 293 * @param pos the starting position for the match, valid for buffer 294 * @param bufferStart the first active index in the buffer, valid for buffer 295 * @param bufferEnd the end index of the active buffer, valid for buffer 296 * @return the number of matching characters, zero for no match 297 */ 298 public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) { 299 return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0; 300 } 301 } 302 303 //----------------------------------------------------------------------- 304 /** 305 * Class used to define a character for matching purposes. 306 */ 307 static final class CharMatcher extends StrMatcher { 308 /** The character to match. */ 309 private final char ch; 310 311 /** 312 * Constructor that creates a matcher that matches a single character. 313 * 314 * @param ch the character to match 315 */ 316 CharMatcher(char ch) { 317 super(); 318 this.ch = ch; 319 } 320 321 /** 322 * Returns whether or not the given character matches. 323 * 324 * @param buffer the text content to match against, do not change 325 * @param pos the starting position for the match, valid for buffer 326 * @param bufferStart the first active index in the buffer, valid for buffer 327 * @param bufferEnd the end index of the active buffer, valid for buffer 328 * @return the number of matching characters, zero for no match 329 */ 330 public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) { 331 return ch == buffer[pos] ? 1 : 0; 332 } 333 } 334 335 //----------------------------------------------------------------------- 336 /** 337 * Class used to define a set of characters for matching purposes. 338 */ 339 static final class StringMatcher extends StrMatcher { 340 /** The string to match, as a character array. */ 341 private final char[] chars; 342 343 /** 344 * Constructor that creates a matcher from a String. 345 * 346 * @param str the string to match, must not be null 347 */ 348 StringMatcher(String str) { 349 super(); 350 chars = str.toCharArray(); 351 } 352 353 /** 354 * Returns whether or not the given text matches the stored string. 355 * 356 * @param buffer the text content to match against, do not change 357 * @param pos the starting position for the match, valid for buffer 358 * @param bufferStart the first active index in the buffer, valid for buffer 359 * @param bufferEnd the end index of the active buffer, valid for buffer 360 * @return the number of matching characters, zero for no match 361 */ 362 public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) { 363 int len = chars.length; 364 if (pos + len > bufferEnd) { 365 return 0; 366 } 367 for (int i = 0; i < chars.length; i++, pos++) { 368 if (chars[i] != buffer[pos]) { 369 return 0; 370 } 371 } 372 return len; 373 } 374 } 375 376 //----------------------------------------------------------------------- 377 /** 378 * Class used to match no characters. 379 */ 380 static final class NoMatcher extends StrMatcher { 381 382 /** 383 * Constructs a new instance of <code>NoMatcher</code>. 384 */ 385 NoMatcher() { 386 super(); 387 } 388 389 /** 390 * Always returns <code>false</code>. 391 * 392 * @param buffer the text content to match against, do not change 393 * @param pos the starting position for the match, valid for buffer 394 * @param bufferStart the first active index in the buffer, valid for buffer 395 * @param bufferEnd the end index of the active buffer, valid for buffer 396 * @return the number of matching characters, zero for no match 397 */ 398 public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) { 399 return 0; 400 } 401 } 402 403 //----------------------------------------------------------------------- 404 /** 405 * Class used to match whitespace as per trim(). 406 */ 407 static final class TrimMatcher extends StrMatcher { 408 409 /** 410 * Constructs a new instance of <code>TrimMatcher</code>. 411 */ 412 TrimMatcher() { 413 super(); 414 } 415 416 /** 417 * Returns whether or not the given character matches. 418 * 419 * @param buffer the text content to match against, do not change 420 * @param pos the starting position for the match, valid for buffer 421 * @param bufferStart the first active index in the buffer, valid for buffer 422 * @param bufferEnd the end index of the active buffer, valid for buffer 423 * @return the number of matching characters, zero for no match 424 */ 425 public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) { 426 return buffer[pos] <= 32 ? 1 : 0; 427 } 428 } 429 430 }