001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.lang3; 018 019import java.util.regex.Pattern; 020 021/** 022 * <p>Helpers to process Strings using regular expressions.</p> 023 * @see java.util.regex.Pattern 024 * @since 3.8 025 */ 026public class RegExUtils { 027 028 /** 029 * <p>Removes each substring of the text String that matches the given regular expression pattern.</p> 030 * 031 * This method is a {@code null} safe equivalent to: 032 * <ul> 033 * <li>{@code pattern.matcher(text).replaceAll(StringUtils.EMPTY)}</li> 034 * </ul> 035 * 036 * <p>A {@code null} reference passed to this method is a no-op.</p> 037 * 038 * <pre> 039 * StringUtils.removeAll(null, *) = null 040 * StringUtils.removeAll("any", (Pattern) null) = "any" 041 * StringUtils.removeAll("any", Pattern.compile("")) = "any" 042 * StringUtils.removeAll("any", Pattern.compile(".*")) = "" 043 * StringUtils.removeAll("any", Pattern.compile(".+")) = "" 044 * StringUtils.removeAll("abc", Pattern.compile(".?")) = "" 045 * StringUtils.removeAll("A<__>\n<__>B", Pattern.compile("<.*>")) = "A\nB" 046 * StringUtils.removeAll("A<__>\n<__>B", Pattern.compile("(?s)<.*>")) = "AB" 047 * StringUtils.removeAll("A<__>\n<__>B", Pattern.compile("<.*>", Pattern.DOTALL)) = "AB" 048 * StringUtils.removeAll("ABCabc123abc", Pattern.compile("[a-z]")) = "ABC123" 049 * </pre> 050 * 051 * @param text text to remove from, may be null 052 * @param regex the regular expression to which this string is to be matched 053 * @return the text with any removes processed, 054 * {@code null} if null String input 055 * 056 * @see #replaceAll(String, Pattern, String) 057 * @see java.util.regex.Matcher#replaceAll(String) 058 * @see java.util.regex.Pattern 059 */ 060 public static String removeAll(final String text, final Pattern regex) { 061 return replaceAll(text, regex, StringUtils.EMPTY); 062 } 063 064 /** 065 * <p>Removes each substring of the text String that matches the given regular expression.</p> 066 * 067 * This method is a {@code null} safe equivalent to: 068 * <ul> 069 * <li>{@code text.replaceAll(regex, StringUtils.EMPTY)}</li> 070 * <li>{@code Pattern.compile(regex).matcher(text).replaceAll(StringUtils.EMPTY)}</li> 071 * </ul> 072 * 073 * <p>A {@code null} reference passed to this method is a no-op.</p> 074 * 075 * <p>Unlike in the {@link #removePattern(String, String)} method, the {@link Pattern#DOTALL} option 076 * is NOT automatically added. 077 * To use the DOTALL option prepend {@code "(?s)"} to the regex. 078 * DOTALL is also known as single-line mode in Perl.</p> 079 * 080 * <pre> 081 * StringUtils.removeAll(null, *) = null 082 * StringUtils.removeAll("any", (String) null) = "any" 083 * StringUtils.removeAll("any", "") = "any" 084 * StringUtils.removeAll("any", ".*") = "" 085 * StringUtils.removeAll("any", ".+") = "" 086 * StringUtils.removeAll("abc", ".?") = "" 087 * StringUtils.removeAll("A<__>\n<__>B", "<.*>") = "A\nB" 088 * StringUtils.removeAll("A<__>\n<__>B", "(?s)<.*>") = "AB" 089 * StringUtils.removeAll("ABCabc123abc", "[a-z]") = "ABC123" 090 * </pre> 091 * 092 * @param text text to remove from, may be null 093 * @param regex the regular expression to which this string is to be matched 094 * @return the text with any removes processed, 095 * {@code null} if null String input 096 * 097 * @throws java.util.regex.PatternSyntaxException 098 * if the regular expression's syntax is invalid 099 * 100 * @see #replaceAll(String, String, String) 101 * @see #removePattern(String, String) 102 * @see String#replaceAll(String, String) 103 * @see java.util.regex.Pattern 104 * @see java.util.regex.Pattern#DOTALL 105 */ 106 public static String removeAll(final String text, final String regex) { 107 return replaceAll(text, regex, StringUtils.EMPTY); 108 } 109 110 /** 111 * <p>Removes the first substring of the text string that matches the given regular expression pattern.</p> 112 * 113 * This method is a {@code null} safe equivalent to: 114 * <ul> 115 * <li>{@code pattern.matcher(text).replaceFirst(StringUtils.EMPTY)}</li> 116 * </ul> 117 * 118 * <p>A {@code null} reference passed to this method is a no-op.</p> 119 * 120 * <pre> 121 * StringUtils.removeFirst(null, *) = null 122 * StringUtils.removeFirst("any", (Pattern) null) = "any" 123 * StringUtils.removeFirst("any", Pattern.compile("")) = "any" 124 * StringUtils.removeFirst("any", Pattern.compile(".*")) = "" 125 * StringUtils.removeFirst("any", Pattern.compile(".+")) = "" 126 * StringUtils.removeFirst("abc", Pattern.compile(".?")) = "bc" 127 * StringUtils.removeFirst("A<__>\n<__>B", Pattern.compile("<.*>")) = "A\n<__>B" 128 * StringUtils.removeFirst("A<__>\n<__>B", Pattern.compile("(?s)<.*>")) = "AB" 129 * StringUtils.removeFirst("ABCabc123", Pattern.compile("[a-z]")) = "ABCbc123" 130 * StringUtils.removeFirst("ABCabc123abc", Pattern.compile("[a-z]+")) = "ABC123abc" 131 * </pre> 132 * 133 * @param text text to remove from, may be null 134 * @param regex the regular expression pattern to which this string is to be matched 135 * @return the text with the first replacement processed, 136 * {@code null} if null String input 137 * 138 * @see #replaceFirst(String, Pattern, String) 139 * @see java.util.regex.Matcher#replaceFirst(String) 140 * @see java.util.regex.Pattern 141 */ 142 public static String removeFirst(final String text, final Pattern regex) { 143 return replaceFirst(text, regex, StringUtils.EMPTY); 144 } 145 146 /** 147 * <p>Removes the first substring of the text string that matches the given regular expression.</p> 148 * 149 * This method is a {@code null} safe equivalent to: 150 * <ul> 151 * <li>{@code text.replaceFirst(regex, StringUtils.EMPTY)}</li> 152 * <li>{@code Pattern.compile(regex).matcher(text).replaceFirst(StringUtils.EMPTY)}</li> 153 * </ul> 154 * 155 * <p>A {@code null} reference passed to this method is a no-op.</p> 156 * 157 * <p>The {@link Pattern#DOTALL} option is NOT automatically added. 158 * To use the DOTALL option prepend {@code "(?s)"} to the regex. 159 * DOTALL is also known as single-line mode in Perl.</p> 160 * 161 * <pre> 162 * StringUtils.removeFirst(null, *) = null 163 * StringUtils.removeFirst("any", (String) null) = "any" 164 * StringUtils.removeFirst("any", "") = "any" 165 * StringUtils.removeFirst("any", ".*") = "" 166 * StringUtils.removeFirst("any", ".+") = "" 167 * StringUtils.removeFirst("abc", ".?") = "bc" 168 * StringUtils.removeFirst("A<__>\n<__>B", "<.*>") = "A\n<__>B" 169 * StringUtils.removeFirst("A<__>\n<__>B", "(?s)<.*>") = "AB" 170 * StringUtils.removeFirst("ABCabc123", "[a-z]") = "ABCbc123" 171 * StringUtils.removeFirst("ABCabc123abc", "[a-z]+") = "ABC123abc" 172 * </pre> 173 * 174 * @param text text to remove from, may be null 175 * @param regex the regular expression to which this string is to be matched 176 * @return the text with the first replacement processed, 177 * {@code null} if null String input 178 * 179 * @throws java.util.regex.PatternSyntaxException 180 * if the regular expression's syntax is invalid 181 * 182 * @see #replaceFirst(String, String, String) 183 * @see String#replaceFirst(String, String) 184 * @see java.util.regex.Pattern 185 * @see java.util.regex.Pattern#DOTALL 186 */ 187 public static String removeFirst(final String text, final String regex) { 188 return replaceFirst(text, regex, StringUtils.EMPTY); 189 } 190 191 /** 192 * <p>Removes each substring of the source String that matches the given regular expression using the DOTALL option.</p> 193 * 194 * This call is a {@code null} safe equivalent to: 195 * <ul> 196 * <li>{@code text.replaceAll("(?s)" + regex, StringUtils.EMPTY)}</li> 197 * <li>{@code Pattern.compile(regex, Pattern.DOTALL).matcher(text).replaceAll(StringUtils.EMPTY)}</li> 198 * </ul> 199 * 200 * <p>A {@code null} reference passed to this method is a no-op.</p> 201 * 202 * <pre> 203 * StringUtils.removePattern(null, *) = null 204 * StringUtils.removePattern("any", (String) null) = "any" 205 * StringUtils.removePattern("A<__>\n<__>B", "<.*>") = "AB" 206 * StringUtils.removePattern("ABCabc123", "[a-z]") = "ABC123" 207 * </pre> 208 * 209 * @param text 210 * the source string 211 * @param regex 212 * the regular expression to which this string is to be matched 213 * @return The resulting {@code String} 214 * @see #replacePattern(String, String, String) 215 * @see String#replaceAll(String, String) 216 * @see Pattern#DOTALL 217 */ 218 public static String removePattern(final String text, final String regex) { 219 return replacePattern(text, regex, StringUtils.EMPTY); 220 } 221 222 /** 223 * <p>Replaces each substring of the text String that matches the given regular expression pattern with the given replacement.</p> 224 * 225 * This method is a {@code null} safe equivalent to: 226 * <ul> 227 * <li>{@code pattern.matcher(text).replaceAll(replacement)}</li> 228 * </ul> 229 * 230 * <p>A {@code null} reference passed to this method is a no-op.</p> 231 * 232 * <pre> 233 * StringUtils.replaceAll(null, *, *) = null 234 * StringUtils.replaceAll("any", (Pattern) null, *) = "any" 235 * StringUtils.replaceAll("any", *, null) = "any" 236 * StringUtils.replaceAll("", Pattern.compile(""), "zzz") = "zzz" 237 * StringUtils.replaceAll("", Pattern.compile(".*"), "zzz") = "zzz" 238 * StringUtils.replaceAll("", Pattern.compile(".+"), "zzz") = "" 239 * StringUtils.replaceAll("abc", Pattern.compile(""), "ZZ") = "ZZaZZbZZcZZ" 240 * StringUtils.replaceAll("<__>\n<__>", Pattern.compile("<.*>"), "z") = "z\nz" 241 * StringUtils.replaceAll("<__>\n<__>", Pattern.compile("<.*>", Pattern.DOTALL), "z") = "z" 242 * StringUtils.replaceAll("<__>\n<__>", Pattern.compile("(?s)<.*>"), "z") = "z" 243 * StringUtils.replaceAll("ABCabc123", Pattern.compile("[a-z]"), "_") = "ABC___123" 244 * StringUtils.replaceAll("ABCabc123", Pattern.compile("[^A-Z0-9]+"), "_") = "ABC_123" 245 * StringUtils.replaceAll("ABCabc123", Pattern.compile("[^A-Z0-9]+"), "") = "ABC123" 246 * StringUtils.replaceAll("Lorem ipsum dolor sit", Pattern.compile("( +)([a-z]+)"), "_$2") = "Lorem_ipsum_dolor_sit" 247 * </pre> 248 * 249 * @param text text to search and replace in, may be null 250 * @param regex the regular expression pattern to which this string is to be matched 251 * @param replacement the string to be substituted for each match 252 * @return the text with any replacements processed, 253 * {@code null} if null String input 254 * 255 * @see java.util.regex.Matcher#replaceAll(String) 256 * @see java.util.regex.Pattern 257 */ 258 public static String replaceAll(final String text, final Pattern regex, final String replacement) { 259 if (text == null || regex == null || replacement == null) { 260 return text; 261 } 262 return regex.matcher(text).replaceAll(replacement); 263 } 264 265 /** 266 * <p>Replaces each substring of the text String that matches the given regular expression 267 * with the given replacement.</p> 268 * 269 * This method is a {@code null} safe equivalent to: 270 * <ul> 271 * <li>{@code text.replaceAll(regex, replacement)}</li> 272 * <li>{@code Pattern.compile(regex).matcher(text).replaceAll(replacement)}</li> 273 * </ul> 274 * 275 * <p>A {@code null} reference passed to this method is a no-op.</p> 276 * 277 * <p>Unlike in the {@link #replacePattern(String, String, String)} method, the {@link Pattern#DOTALL} option 278 * is NOT automatically added. 279 * To use the DOTALL option prepend {@code "(?s)"} to the regex. 280 * DOTALL is also known as single-line mode in Perl.</p> 281 * 282 * <pre> 283 * StringUtils.replaceAll(null, *, *) = null 284 * StringUtils.replaceAll("any", (String) null, *) = "any" 285 * StringUtils.replaceAll("any", *, null) = "any" 286 * StringUtils.replaceAll("", "", "zzz") = "zzz" 287 * StringUtils.replaceAll("", ".*", "zzz") = "zzz" 288 * StringUtils.replaceAll("", ".+", "zzz") = "" 289 * StringUtils.replaceAll("abc", "", "ZZ") = "ZZaZZbZZcZZ" 290 * StringUtils.replaceAll("<__>\n<__>", "<.*>", "z") = "z\nz" 291 * StringUtils.replaceAll("<__>\n<__>", "(?s)<.*>", "z") = "z" 292 * StringUtils.replaceAll("ABCabc123", "[a-z]", "_") = "ABC___123" 293 * StringUtils.replaceAll("ABCabc123", "[^A-Z0-9]+", "_") = "ABC_123" 294 * StringUtils.replaceAll("ABCabc123", "[^A-Z0-9]+", "") = "ABC123" 295 * StringUtils.replaceAll("Lorem ipsum dolor sit", "( +)([a-z]+)", "_$2") = "Lorem_ipsum_dolor_sit" 296 * </pre> 297 * 298 * @param text text to search and replace in, may be null 299 * @param regex the regular expression to which this string is to be matched 300 * @param replacement the string to be substituted for each match 301 * @return the text with any replacements processed, 302 * {@code null} if null String input 303 * 304 * @throws java.util.regex.PatternSyntaxException 305 * if the regular expression's syntax is invalid 306 * 307 * @see #replacePattern(String, String, String) 308 * @see String#replaceAll(String, String) 309 * @see java.util.regex.Pattern 310 * @see java.util.regex.Pattern#DOTALL 311 */ 312 public static String replaceAll(final String text, final String regex, final String replacement) { 313 if (text == null || regex == null || replacement == null) { 314 return text; 315 } 316 return text.replaceAll(regex, replacement); 317 } 318 319 /** 320 * <p>Replaces the first substring of the text string that matches the given regular expression pattern 321 * with the given replacement.</p> 322 * 323 * This method is a {@code null} safe equivalent to: 324 * <ul> 325 * <li>{@code pattern.matcher(text).replaceFirst(replacement)}</li> 326 * </ul> 327 * 328 * <p>A {@code null} reference passed to this method is a no-op.</p> 329 * 330 * <pre> 331 * StringUtils.replaceFirst(null, *, *) = null 332 * StringUtils.replaceFirst("any", (Pattern) null, *) = "any" 333 * StringUtils.replaceFirst("any", *, null) = "any" 334 * StringUtils.replaceFirst("", Pattern.compile(""), "zzz") = "zzz" 335 * StringUtils.replaceFirst("", Pattern.compile(".*"), "zzz") = "zzz" 336 * StringUtils.replaceFirst("", Pattern.compile(".+"), "zzz") = "" 337 * StringUtils.replaceFirst("abc", Pattern.compile(""), "ZZ") = "ZZabc" 338 * StringUtils.replaceFirst("<__>\n<__>", Pattern.compile("<.*>"), "z") = "z\n<__>" 339 * StringUtils.replaceFirst("<__>\n<__>", Pattern.compile("(?s)<.*>"), "z") = "z" 340 * StringUtils.replaceFirst("ABCabc123", Pattern.compile("[a-z]"), "_") = "ABC_bc123" 341 * StringUtils.replaceFirst("ABCabc123abc", Pattern.compile("[^A-Z0-9]+"), "_") = "ABC_123abc" 342 * StringUtils.replaceFirst("ABCabc123abc", Pattern.compile("[^A-Z0-9]+"), "") = "ABC123abc" 343 * StringUtils.replaceFirst("Lorem ipsum dolor sit", Pattern.compile("( +)([a-z]+)"), "_$2") = "Lorem_ipsum dolor sit" 344 * </pre> 345 * 346 * @param text text to search and replace in, may be null 347 * @param regex the regular expression pattern to which this string is to be matched 348 * @param replacement the string to be substituted for the first match 349 * @return the text with the first replacement processed, 350 * {@code null} if null String input 351 * 352 * @see java.util.regex.Matcher#replaceFirst(String) 353 * @see java.util.regex.Pattern 354 */ 355 public static String replaceFirst(final String text, final Pattern regex, final String replacement) { 356 if (text == null || regex == null|| replacement == null ) { 357 return text; 358 } 359 return regex.matcher(text).replaceFirst(replacement); 360 } 361 362 /** 363 * <p>Replaces the first substring of the text string that matches the given regular expression 364 * with the given replacement.</p> 365 * 366 * This method is a {@code null} safe equivalent to: 367 * <ul> 368 * <li>{@code text.replaceFirst(regex, replacement)}</li> 369 * <li>{@code Pattern.compile(regex).matcher(text).replaceFirst(replacement)}</li> 370 * </ul> 371 * 372 * <p>A {@code null} reference passed to this method is a no-op.</p> 373 * 374 * <p>The {@link Pattern#DOTALL} option is NOT automatically added. 375 * To use the DOTALL option prepend {@code "(?s)"} to the regex. 376 * DOTALL is also known as single-line mode in Perl.</p> 377 * 378 * <pre> 379 * StringUtils.replaceFirst(null, *, *) = null 380 * StringUtils.replaceFirst("any", (String) null, *) = "any" 381 * StringUtils.replaceFirst("any", *, null) = "any" 382 * StringUtils.replaceFirst("", "", "zzz") = "zzz" 383 * StringUtils.replaceFirst("", ".*", "zzz") = "zzz" 384 * StringUtils.replaceFirst("", ".+", "zzz") = "" 385 * StringUtils.replaceFirst("abc", "", "ZZ") = "ZZabc" 386 * StringUtils.replaceFirst("<__>\n<__>", "<.*>", "z") = "z\n<__>" 387 * StringUtils.replaceFirst("<__>\n<__>", "(?s)<.*>", "z") = "z" 388 * StringUtils.replaceFirst("ABCabc123", "[a-z]", "_") = "ABC_bc123" 389 * StringUtils.replaceFirst("ABCabc123abc", "[^A-Z0-9]+", "_") = "ABC_123abc" 390 * StringUtils.replaceFirst("ABCabc123abc", "[^A-Z0-9]+", "") = "ABC123abc" 391 * StringUtils.replaceFirst("Lorem ipsum dolor sit", "( +)([a-z]+)", "_$2") = "Lorem_ipsum dolor sit" 392 * </pre> 393 * 394 * @param text text to search and replace in, may be null 395 * @param regex the regular expression to which this string is to be matched 396 * @param replacement the string to be substituted for the first match 397 * @return the text with the first replacement processed, 398 * {@code null} if null String input 399 * 400 * @throws java.util.regex.PatternSyntaxException 401 * if the regular expression's syntax is invalid 402 * 403 * @see String#replaceFirst(String, String) 404 * @see java.util.regex.Pattern 405 * @see java.util.regex.Pattern#DOTALL 406 */ 407 public static String replaceFirst(final String text, final String regex, final String replacement) { 408 if (text == null || regex == null|| replacement == null ) { 409 return text; 410 } 411 return text.replaceFirst(regex, replacement); 412 } 413 414 /** 415 * <p>Replaces each substring of the source String that matches the given regular expression with the given 416 * replacement using the {@link Pattern#DOTALL} option. DOTALL is also known as single-line mode in Perl.</p> 417 * 418 * This call is a {@code null} safe equivalent to: 419 * <ul> 420 * <li>{@code text.replaceAll("(?s)" + regex, replacement)}</li> 421 * <li>{@code Pattern.compile(regex, Pattern.DOTALL).matcher(text).replaceAll(replacement)}</li> 422 * </ul> 423 * 424 * <p>A {@code null} reference passed to this method is a no-op.</p> 425 * 426 * <pre> 427 * StringUtils.replacePattern(null, *, *) = null 428 * StringUtils.replacePattern("any", (String) null, *) = "any" 429 * StringUtils.replacePattern("any", *, null) = "any" 430 * StringUtils.replacePattern("", "", "zzz") = "zzz" 431 * StringUtils.replacePattern("", ".*", "zzz") = "zzz" 432 * StringUtils.replacePattern("", ".+", "zzz") = "" 433 * StringUtils.replacePattern("<__>\n<__>", "<.*>", "z") = "z" 434 * StringUtils.replacePattern("ABCabc123", "[a-z]", "_") = "ABC___123" 435 * StringUtils.replacePattern("ABCabc123", "[^A-Z0-9]+", "_") = "ABC_123" 436 * StringUtils.replacePattern("ABCabc123", "[^A-Z0-9]+", "") = "ABC123" 437 * StringUtils.replacePattern("Lorem ipsum dolor sit", "( +)([a-z]+)", "_$2") = "Lorem_ipsum_dolor_sit" 438 * </pre> 439 * 440 * @param text 441 * the source string 442 * @param regex 443 * the regular expression to which this string is to be matched 444 * @param replacement 445 * the string to be substituted for each match 446 * @return The resulting {@code String} 447 * @see #replaceAll(String, String, String) 448 * @see String#replaceAll(String, String) 449 * @see Pattern#DOTALL 450 */ 451 public static String replacePattern(final String text, final String regex, final String replacement) { 452 if (text == null || regex == null || replacement == null) { 453 return text; 454 } 455 return Pattern.compile(regex, Pattern.DOTALL).matcher(text).replaceAll(replacement); 456 } 457 458}