View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.lang3;
18  
19  import java.util.regex.Matcher;
20  import java.util.regex.Pattern;
21  
22  /**
23   * Helpers to process Strings using regular expressions.
24   * @see java.util.regex.Pattern
25   * @since 3.8
26   */
27  public class RegExUtils {
28  
29      /**
30       * Compiles the given regular expression into a pattern with the {@link Pattern#DOTALL} flag.
31       *
32       * @param regex The expression to be compiled
33       * @return the given regular expression compiled into a pattern with the {@link Pattern#DOTALL} flag.
34       * @since 3.13.0
35       */
36      public static Pattern dotAll(final String regex) {
37          return Pattern.compile(regex, Pattern.DOTALL);
38      }
39  
40      /**
41       * Compiles the given regular expression into a pattern with the {@link Pattern#DOTALL} flag, then creates a matcher that will match the given text against
42       * this pattern.
43       *
44       * @param regex The expression to be compiled.
45       * @param text  The character sequence to be matched.
46       * @return A new matcher for this pattern.
47       * @since 3.13.0
48       */
49      public static Matcher dotAllMatcher(final String regex, final String text) {
50          return dotAll(regex).matcher(text);
51      }
52  
53      /**
54       * Removes each substring of the text String that matches the given regular expression pattern.
55       *
56       * This method is a {@code null} safe equivalent to:
57       * <ul>
58       *  <li>{@code pattern.matcher(text).replaceAll(StringUtils.EMPTY)}</li>
59       * </ul>
60       *
61       * <p>A {@code null} reference passed to this method is a no-op.</p>
62       *
63       * <pre>{@code
64       * StringUtils.removeAll(null, *)      = null
65       * StringUtils.removeAll("any", (Pattern) null)  = "any"
66       * StringUtils.removeAll("any", Pattern.compile(""))    = "any"
67       * StringUtils.removeAll("any", Pattern.compile(".*"))  = ""
68       * StringUtils.removeAll("any", Pattern.compile(".+"))  = ""
69       * StringUtils.removeAll("abc", Pattern.compile(".?"))  = ""
70       * StringUtils.removeAll("A<__>\n<__>B", Pattern.compile("<.*>"))      = "A\nB"
71       * StringUtils.removeAll("A<__>\n<__>B", Pattern.compile("(?s)<.*>"))  = "AB"
72       * StringUtils.removeAll("A<__>\n<__>B", Pattern.compile("<.*>", Pattern.DOTALL))  = "AB"
73       * StringUtils.removeAll("ABCabc123abc", Pattern.compile("[a-z]"))     = "ABC123"
74       * }</pre>
75       *
76       * @param text  text to remove from, may be null
77       * @param regex  the regular expression to which this string is to be matched
78       * @return  the text with any removes processed,
79       *              {@code null} if null String input
80       *
81       * @see #replaceAll(String, Pattern, String)
82       * @see java.util.regex.Matcher#replaceAll(String)
83       * @see java.util.regex.Pattern
84       */
85      public static String removeAll(final String text, final Pattern regex) {
86          return replaceAll(text, regex, StringUtils.EMPTY);
87      }
88  
89      /**
90       * Removes each substring of the text String that matches the given regular expression.
91       *
92       * This method is a {@code null} safe equivalent to:
93       * <ul>
94       *  <li>{@code text.replaceAll(regex, StringUtils.EMPTY)}</li>
95       *  <li>{@code Pattern.compile(regex).matcher(text).replaceAll(StringUtils.EMPTY)}</li>
96       * </ul>
97       *
98       * <p>A {@code null} reference passed to this method is a no-op.</p>
99       *
100      * <p>Unlike in the {@link #removePattern(String, String)} method, the {@link Pattern#DOTALL} option
101      * is NOT automatically added.
102      * To use the DOTALL option prepend {@code "(?s)"} to the regex.
103      * DOTALL is also known as single-line mode in Perl.</p>
104      *
105      * <pre>{@code
106      * StringUtils.removeAll(null, *)      = null
107      * StringUtils.removeAll("any", (String) null)  = "any"
108      * StringUtils.removeAll("any", "")    = "any"
109      * StringUtils.removeAll("any", ".*")  = ""
110      * StringUtils.removeAll("any", ".+")  = ""
111      * StringUtils.removeAll("abc", ".?")  = ""
112      * StringUtils.removeAll("A<__>\n<__>B", "<.*>")      = "A\nB"
113      * StringUtils.removeAll("A<__>\n<__>B", "(?s)<.*>")  = "AB"
114      * StringUtils.removeAll("ABCabc123abc", "[a-z]")     = "ABC123"
115      * }</pre>
116      *
117      * @param text  text to remove from, may be null
118      * @param regex  the regular expression to which this string is to be matched
119      * @return  the text with any removes processed,
120      *              {@code null} if null String input
121      *
122      * @throws  java.util.regex.PatternSyntaxException
123      *              if the regular expression's syntax is invalid
124      *
125      * @see #replaceAll(String, String, String)
126      * @see #removePattern(String, String)
127      * @see String#replaceAll(String, String)
128      * @see java.util.regex.Pattern
129      * @see java.util.regex.Pattern#DOTALL
130      */
131     public static String removeAll(final String text, final String regex) {
132         return replaceAll(text, regex, StringUtils.EMPTY);
133     }
134 
135     /**
136      * Removes the first substring of the text string that matches the given regular expression pattern.
137      *
138      * This method is a {@code null} safe equivalent to:
139      * <ul>
140      *  <li>{@code pattern.matcher(text).replaceFirst(StringUtils.EMPTY)}</li>
141      * </ul>
142      *
143      * <p>A {@code null} reference passed to this method is a no-op.</p>
144      *
145      * <pre>{@code
146      * StringUtils.removeFirst(null, *)      = null
147      * StringUtils.removeFirst("any", (Pattern) null)  = "any"
148      * StringUtils.removeFirst("any", Pattern.compile(""))    = "any"
149      * StringUtils.removeFirst("any", Pattern.compile(".*"))  = ""
150      * StringUtils.removeFirst("any", Pattern.compile(".+"))  = ""
151      * StringUtils.removeFirst("abc", Pattern.compile(".?"))  = "bc"
152      * StringUtils.removeFirst("A<__>\n<__>B", Pattern.compile("<.*>"))      = "A\n<__>B"
153      * StringUtils.removeFirst("A<__>\n<__>B", Pattern.compile("(?s)<.*>"))  = "AB"
154      * StringUtils.removeFirst("ABCabc123", Pattern.compile("[a-z]"))          = "ABCbc123"
155      * StringUtils.removeFirst("ABCabc123abc", Pattern.compile("[a-z]+"))      = "ABC123abc"
156      * }</pre>
157      *
158      * @param text  text to remove from, may be null
159      * @param regex  the regular expression pattern to which this string is to be matched
160      * @return  the text with the first replacement processed,
161      *              {@code null} if null String input
162      *
163      * @see #replaceFirst(String, Pattern, String)
164      * @see java.util.regex.Matcher#replaceFirst(String)
165      * @see java.util.regex.Pattern
166      */
167     public static String removeFirst(final String text, final Pattern regex) {
168         return replaceFirst(text, regex, StringUtils.EMPTY);
169     }
170 
171     /**
172      * Removes the first substring of the text string that matches the given regular expression.
173      *
174      * This method is a {@code null} safe equivalent to:
175      * <ul>
176      *  <li>{@code text.replaceFirst(regex, StringUtils.EMPTY)}</li>
177      *  <li>{@code Pattern.compile(regex).matcher(text).replaceFirst(StringUtils.EMPTY)}</li>
178      * </ul>
179      *
180      * <p>A {@code null} reference passed to this method is a no-op.</p>
181      *
182      * <p>The {@link Pattern#DOTALL} option is NOT automatically added.
183      * To use the DOTALL option prepend {@code "(?s)"} to the regex.
184      * DOTALL is also known as single-line mode in Perl.</p>
185      *
186      * <pre>{@code
187      * StringUtils.removeFirst(null, *)      = null
188      * StringUtils.removeFirst("any", (String) null)  = "any"
189      * StringUtils.removeFirst("any", "")    = "any"
190      * StringUtils.removeFirst("any", ".*")  = ""
191      * StringUtils.removeFirst("any", ".+")  = ""
192      * StringUtils.removeFirst("abc", ".?")  = "bc"
193      * StringUtils.removeFirst("A<__>\n<__>B", "<.*>")      = "A\n<__>B"
194      * StringUtils.removeFirst("A<__>\n<__>B", "(?s)<.*>")  = "AB"
195      * StringUtils.removeFirst("ABCabc123", "[a-z]")          = "ABCbc123"
196      * StringUtils.removeFirst("ABCabc123abc", "[a-z]+")      = "ABC123abc"
197      * }</pre>
198      *
199      * @param text  text to remove from, may be null
200      * @param regex  the regular expression to which this string is to be matched
201      * @return  the text with the first replacement processed,
202      *              {@code null} if null String input
203      *
204      * @throws  java.util.regex.PatternSyntaxException
205      *              if the regular expression's syntax is invalid
206      *
207      * @see #replaceFirst(String, String, String)
208      * @see String#replaceFirst(String, String)
209      * @see java.util.regex.Pattern
210      * @see java.util.regex.Pattern#DOTALL
211      */
212     public static String removeFirst(final String text, final String regex) {
213         return replaceFirst(text, regex, StringUtils.EMPTY);
214     }
215 
216     /**
217      * Removes each substring of the source String that matches the given regular expression using the DOTALL option.
218      *
219      * This call is a {@code null} safe equivalent to:
220      * <ul>
221      * <li>{@code text.replaceAll(&quot;(?s)&quot; + regex, StringUtils.EMPTY)}</li>
222      * <li>{@code Pattern.compile(regex, Pattern.DOTALL).matcher(text).replaceAll(StringUtils.EMPTY)}</li>
223      * </ul>
224      *
225      * <p>A {@code null} reference passed to this method is a no-op.</p>
226      *
227      * <pre>{@code
228      * StringUtils.removePattern(null, *)       = null
229      * StringUtils.removePattern("any", (String) null)   = "any"
230      * StringUtils.removePattern("A<__>\n<__>B", "<.*>")  = "AB"
231      * StringUtils.removePattern("ABCabc123", "[a-z]")    = "ABC123"
232      * }</pre>
233      *
234      * @param text
235      *            the source string
236      * @param regex
237      *            the regular expression to which this string is to be matched
238      * @return The resulting {@link String}
239      * @see #replacePattern(String, String, String)
240      * @see String#replaceAll(String, String)
241      * @see Pattern#DOTALL
242      */
243     public static String removePattern(final String text, final String regex) {
244         return replacePattern(text, regex, StringUtils.EMPTY);
245     }
246 
247     /**
248      * Replaces each substring of the text String that matches the given regular expression pattern with the given replacement.
249      *
250      * This method is a {@code null} safe equivalent to:
251      * <ul>
252      *  <li>{@code pattern.matcher(text).replaceAll(replacement)}</li>
253      * </ul>
254      *
255      * <p>A {@code null} reference passed to this method is a no-op.</p>
256      *
257      * <pre>{@code
258      * StringUtils.replaceAll(null, *, *)       = null
259      * StringUtils.replaceAll("any", (Pattern) null, *)   = "any"
260      * StringUtils.replaceAll("any", *, null)   = "any"
261      * StringUtils.replaceAll("", Pattern.compile(""), "zzz")    = "zzz"
262      * StringUtils.replaceAll("", Pattern.compile(".*"), "zzz")  = "zzz"
263      * StringUtils.replaceAll("", Pattern.compile(".+"), "zzz")  = ""
264      * StringUtils.replaceAll("abc", Pattern.compile(""), "ZZ")  = "ZZaZZbZZcZZ"
265      * StringUtils.replaceAll("<__>\n<__>", Pattern.compile("<.*>"), "z")                 = "z\nz"
266      * StringUtils.replaceAll("<__>\n<__>", Pattern.compile("<.*>", Pattern.DOTALL), "z") = "z"
267      * StringUtils.replaceAll("<__>\n<__>", Pattern.compile("(?s)<.*>"), "z")             = "z"
268      * StringUtils.replaceAll("ABCabc123", Pattern.compile("[a-z]"), "_")       = "ABC___123"
269      * StringUtils.replaceAll("ABCabc123", Pattern.compile("[^A-Z0-9]+"), "_")  = "ABC_123"
270      * StringUtils.replaceAll("ABCabc123", Pattern.compile("[^A-Z0-9]+"), "")   = "ABC123"
271      * StringUtils.replaceAll("Lorem ipsum  dolor   sit", Pattern.compile("( +)([a-z]+)"), "_$2")  = "Lorem_ipsum_dolor_sit"
272      * }</pre>
273      *
274      * @param text  text to search and replace in, may be null
275      * @param regex  the regular expression pattern to which this string is to be matched
276      * @param replacement  the string to be substituted for each match
277      * @return  the text with any replacements processed,
278      *              {@code null} if null String input
279      *
280      * @see java.util.regex.Matcher#replaceAll(String)
281      * @see java.util.regex.Pattern
282      */
283     public static String replaceAll(final String text, final Pattern regex, final String replacement) {
284         if (ObjectUtils.anyNull(text, regex, replacement)) {
285             return text;
286         }
287         return regex.matcher(text).replaceAll(replacement);
288     }
289 
290     /**
291      * Replaces each substring of the text String that matches the given regular expression
292      * with the given replacement.
293      *
294      * This method is a {@code null} safe equivalent to:
295      * <ul>
296      *  <li>{@code text.replaceAll(regex, replacement)}</li>
297      *  <li>{@code Pattern.compile(regex).matcher(text).replaceAll(replacement)}</li>
298      * </ul>
299      *
300      * <p>A {@code null} reference passed to this method is a no-op.</p>
301      *
302      * <p>Unlike in the {@link #replacePattern(String, String, String)} method, the {@link Pattern#DOTALL} option
303      * is NOT automatically added.
304      * To use the DOTALL option prepend {@code "(?s)"} to the regex.
305      * DOTALL is also known as single-line mode in Perl.</p>
306      *
307      * <pre>{@code
308      * StringUtils.replaceAll(null, *, *)       = null
309      * StringUtils.replaceAll("any", (String) null, *)   = "any"
310      * StringUtils.replaceAll("any", *, null)   = "any"
311      * StringUtils.replaceAll("", "", "zzz")    = "zzz"
312      * StringUtils.replaceAll("", ".*", "zzz")  = "zzz"
313      * StringUtils.replaceAll("", ".+", "zzz")  = ""
314      * StringUtils.replaceAll("abc", "", "ZZ")  = "ZZaZZbZZcZZ"
315      * StringUtils.replaceAll("<__>\n<__>", "<.*>", "z")      = "z\nz"
316      * StringUtils.replaceAll("<__>\n<__>", "(?s)<.*>", "z")  = "z"
317      * StringUtils.replaceAll("ABCabc123", "[a-z]", "_")       = "ABC___123"
318      * StringUtils.replaceAll("ABCabc123", "[^A-Z0-9]+", "_")  = "ABC_123"
319      * StringUtils.replaceAll("ABCabc123", "[^A-Z0-9]+", "")   = "ABC123"
320      * StringUtils.replaceAll("Lorem ipsum  dolor   sit", "( +)([a-z]+)", "_$2")  = "Lorem_ipsum_dolor_sit"
321      * }</pre>
322      *
323      * @param text  text to search and replace in, may be null
324      * @param regex  the regular expression to which this string is to be matched
325      * @param replacement  the string to be substituted for each match
326      * @return  the text with any replacements processed,
327      *              {@code null} if null String input
328      *
329      * @throws  java.util.regex.PatternSyntaxException
330      *              if the regular expression's syntax is invalid
331      *
332      * @see #replacePattern(String, String, String)
333      * @see String#replaceAll(String, String)
334      * @see java.util.regex.Pattern
335      * @see java.util.regex.Pattern#DOTALL
336      */
337     public static String replaceAll(final String text, final String regex, final String replacement) {
338         if (ObjectUtils.anyNull(text, regex, replacement)) {
339             return text;
340         }
341         return text.replaceAll(regex, replacement);
342     }
343 
344     /**
345      * Replaces the first substring of the text string that matches the given regular expression pattern
346      * with the given replacement.
347      *
348      * This method is a {@code null} safe equivalent to:
349      * <ul>
350      *  <li>{@code pattern.matcher(text).replaceFirst(replacement)}</li>
351      * </ul>
352      *
353      * <p>A {@code null} reference passed to this method is a no-op.</p>
354      *
355      * <pre>{@code
356      * StringUtils.replaceFirst(null, *, *)       = null
357      * StringUtils.replaceFirst("any", (Pattern) null, *)   = "any"
358      * StringUtils.replaceFirst("any", *, null)   = "any"
359      * StringUtils.replaceFirst("", Pattern.compile(""), "zzz")    = "zzz"
360      * StringUtils.replaceFirst("", Pattern.compile(".*"), "zzz")  = "zzz"
361      * StringUtils.replaceFirst("", Pattern.compile(".+"), "zzz")  = ""
362      * StringUtils.replaceFirst("abc", Pattern.compile(""), "ZZ")  = "ZZabc"
363      * StringUtils.replaceFirst("<__>\n<__>", Pattern.compile("<.*>"), "z")      = "z\n<__>"
364      * StringUtils.replaceFirst("<__>\n<__>", Pattern.compile("(?s)<.*>"), "z")  = "z"
365      * StringUtils.replaceFirst("ABCabc123", Pattern.compile("[a-z]"), "_")          = "ABC_bc123"
366      * StringUtils.replaceFirst("ABCabc123abc", Pattern.compile("[^A-Z0-9]+"), "_")  = "ABC_123abc"
367      * StringUtils.replaceFirst("ABCabc123abc", Pattern.compile("[^A-Z0-9]+"), "")   = "ABC123abc"
368      * StringUtils.replaceFirst("Lorem ipsum  dolor   sit", Pattern.compile("( +)([a-z]+)"), "_$2")  = "Lorem_ipsum  dolor   sit"
369      * }</pre>
370      *
371      * @param text  text to search and replace in, may be null
372      * @param regex  the regular expression pattern to which this string is to be matched
373      * @param replacement  the string to be substituted for the first match
374      * @return  the text with the first replacement processed,
375      *              {@code null} if null String input
376      *
377      * @see java.util.regex.Matcher#replaceFirst(String)
378      * @see java.util.regex.Pattern
379      */
380     public static String replaceFirst(final String text, final Pattern regex, final String replacement) {
381         if (text == null || regex == null || replacement == null) {
382             return text;
383         }
384         return regex.matcher(text).replaceFirst(replacement);
385     }
386 
387     /**
388      * Replaces the first substring of the text string that matches the given regular expression
389      * with the given replacement.
390      *
391      * This method is a {@code null} safe equivalent to:
392      * <ul>
393      *  <li>{@code text.replaceFirst(regex, replacement)}</li>
394      *  <li>{@code Pattern.compile(regex).matcher(text).replaceFirst(replacement)}</li>
395      * </ul>
396      *
397      * <p>A {@code null} reference passed to this method is a no-op.</p>
398      *
399      * <p>The {@link Pattern#DOTALL} option is NOT automatically added.
400      * To use the DOTALL option prepend {@code "(?s)"} to the regex.
401      * DOTALL is also known as single-line mode in Perl.</p>
402      *
403      * <pre>{@code
404      * StringUtils.replaceFirst(null, *, *)       = null
405      * StringUtils.replaceFirst("any", (String) null, *)   = "any"
406      * StringUtils.replaceFirst("any", *, null)   = "any"
407      * StringUtils.replaceFirst("", "", "zzz")    = "zzz"
408      * StringUtils.replaceFirst("", ".*", "zzz")  = "zzz"
409      * StringUtils.replaceFirst("", ".+", "zzz")  = ""
410      * StringUtils.replaceFirst("abc", "", "ZZ")  = "ZZabc"
411      * StringUtils.replaceFirst("<__>\n<__>", "<.*>", "z")      = "z\n<__>"
412      * StringUtils.replaceFirst("<__>\n<__>", "(?s)<.*>", "z")  = "z"
413      * StringUtils.replaceFirst("ABCabc123", "[a-z]", "_")          = "ABC_bc123"
414      * StringUtils.replaceFirst("ABCabc123abc", "[^A-Z0-9]+", "_")  = "ABC_123abc"
415      * StringUtils.replaceFirst("ABCabc123abc", "[^A-Z0-9]+", "")   = "ABC123abc"
416      * StringUtils.replaceFirst("Lorem ipsum  dolor   sit", "( +)([a-z]+)", "_$2")  = "Lorem_ipsum  dolor   sit"
417      * }</pre>
418      *
419      * @param text  text to search and replace in, may be null
420      * @param regex  the regular expression to which this string is to be matched
421      * @param replacement  the string to be substituted for the first match
422      * @return  the text with the first replacement processed,
423      *              {@code null} if null String input
424      *
425      * @throws  java.util.regex.PatternSyntaxException
426      *              if the regular expression's syntax is invalid
427      *
428      * @see String#replaceFirst(String, String)
429      * @see java.util.regex.Pattern
430      * @see java.util.regex.Pattern#DOTALL
431      */
432     public static String replaceFirst(final String text, final String regex, final String replacement) {
433         if (text == null || regex == null || replacement == null) {
434             return text;
435         }
436         return text.replaceFirst(regex, replacement);
437     }
438 
439     /**
440      * Replaces each substring of the source String that matches the given regular expression with the given
441      * replacement using the {@link Pattern#DOTALL} option. DOTALL is also known as single-line mode in Perl.
442      *
443      * This call is a {@code null} safe equivalent to:
444      * <ul>
445      * <li>{@code text.replaceAll(&quot;(?s)&quot; + regex, replacement)}</li>
446      * <li>{@code Pattern.compile(regex, Pattern.DOTALL).matcher(text).replaceAll(replacement)}</li>
447      * </ul>
448      *
449      * <p>A {@code null} reference passed to this method is a no-op.</p>
450      *
451      * <pre>{@code
452      * StringUtils.replacePattern(null, *, *)       = null
453      * StringUtils.replacePattern("any", (String) null, *)   = "any"
454      * StringUtils.replacePattern("any", *, null)   = "any"
455      * StringUtils.replacePattern("", "", "zzz")    = "zzz"
456      * StringUtils.replacePattern("", ".*", "zzz")  = "zzz"
457      * StringUtils.replacePattern("", ".+", "zzz")  = ""
458      * StringUtils.replacePattern("<__>\n<__>", "<.*>", "z")       = "z"
459      * StringUtils.replacePattern("ABCabc123", "[a-z]", "_")       = "ABC___123"
460      * StringUtils.replacePattern("ABCabc123", "[^A-Z0-9]+", "_")  = "ABC_123"
461      * StringUtils.replacePattern("ABCabc123", "[^A-Z0-9]+", "")   = "ABC123"
462      * StringUtils.replacePattern("Lorem ipsum  dolor   sit", "( +)([a-z]+)", "_$2")  = "Lorem_ipsum_dolor_sit"
463      * }</pre>
464      *
465      * @param text
466      *            the source string
467      * @param regex
468      *            the regular expression to which this string is to be matched
469      * @param replacement
470      *            the string to be substituted for each match
471      * @return The resulting {@link String}
472      * @see #replaceAll(String, String, String)
473      * @see String#replaceAll(String, String)
474      * @see Pattern#DOTALL
475      */
476     public static String replacePattern(final String text, final String regex, final String replacement) {
477         if (ObjectUtils.anyNull(text, regex, replacement)) {
478             return text;
479         }
480         return dotAllMatcher(regex, text).replaceAll(replacement);
481     }
482 
483     /**
484      * Make private in 4.0.
485      *
486      * @deprecated TODO Make private in 4.0.
487      */
488     @Deprecated
489     public RegExUtils() {
490         // empty
491     }
492 }