001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.lang3;
018
019import java.util.regex.Pattern;
020
021/**
022 * <p>Helpers to process Strings using regular expressions.</p>
023 * @see java.util.regex.Pattern
024 * @since 3.8
025 */
026public class RegExUtils {
027
028    /**
029     * <p>Removes each substring of the text String that matches the given regular expression pattern.</p>
030     *
031     * This method is a {@code null} safe equivalent to:
032     * <ul>
033     *  <li>{@code pattern.matcher(text).replaceAll(StringUtils.EMPTY)}</li>
034     * </ul>
035     *
036     * <p>A {@code null} reference passed to this method is a no-op.</p>
037     *
038     * <pre>
039     * StringUtils.removeAll(null, *)      = null
040     * StringUtils.removeAll("any", (Pattern) null)  = "any"
041     * StringUtils.removeAll("any", Pattern.compile(""))    = "any"
042     * StringUtils.removeAll("any", Pattern.compile(".*"))  = ""
043     * StringUtils.removeAll("any", Pattern.compile(".+"))  = ""
044     * StringUtils.removeAll("abc", Pattern.compile(".?"))  = ""
045     * StringUtils.removeAll("A&lt;__&gt;\n&lt;__&gt;B", Pattern.compile("&lt;.*&gt;"))      = "A\nB"
046     * StringUtils.removeAll("A&lt;__&gt;\n&lt;__&gt;B", Pattern.compile("(?s)&lt;.*&gt;"))  = "AB"
047     * StringUtils.removeAll("A&lt;__&gt;\n&lt;__&gt;B", Pattern.compile("&lt;.*&gt;", Pattern.DOTALL))  = "AB"
048     * StringUtils.removeAll("ABCabc123abc", Pattern.compile("[a-z]"))     = "ABC123"
049     * </pre>
050     *
051     * @param text  text to remove from, may be null
052     * @param regex  the regular expression to which this string is to be matched
053     * @return  the text with any removes processed,
054     *              {@code null} if null String input
055     *
056     * @see #replaceAll(String, Pattern, String)
057     * @see java.util.regex.Matcher#replaceAll(String)
058     * @see java.util.regex.Pattern
059     */
060    public static String removeAll(final String text, final Pattern regex) {
061        return replaceAll(text, regex, StringUtils.EMPTY);
062    }
063
064    /**
065     * <p>Removes each substring of the text String that matches the given regular expression.</p>
066     *
067     * This method is a {@code null} safe equivalent to:
068     * <ul>
069     *  <li>{@code text.replaceAll(regex, StringUtils.EMPTY)}</li>
070     *  <li>{@code Pattern.compile(regex).matcher(text).replaceAll(StringUtils.EMPTY)}</li>
071     * </ul>
072     *
073     * <p>A {@code null} reference passed to this method is a no-op.</p>
074     *
075     * <p>Unlike in the {@link #removePattern(String, String)} method, the {@link Pattern#DOTALL} option
076     * is NOT automatically added.
077     * To use the DOTALL option prepend <code>"(?s)"</code> to the regex.
078     * DOTALL is also known as single-line mode in Perl.</p>
079     *
080     * <pre>
081     * StringUtils.removeAll(null, *)      = null
082     * StringUtils.removeAll("any", (String) null)  = "any"
083     * StringUtils.removeAll("any", "")    = "any"
084     * StringUtils.removeAll("any", ".*")  = ""
085     * StringUtils.removeAll("any", ".+")  = ""
086     * StringUtils.removeAll("abc", ".?")  = ""
087     * StringUtils.removeAll("A&lt;__&gt;\n&lt;__&gt;B", "&lt;.*&gt;")      = "A\nB"
088     * StringUtils.removeAll("A&lt;__&gt;\n&lt;__&gt;B", "(?s)&lt;.*&gt;")  = "AB"
089     * StringUtils.removeAll("ABCabc123abc", "[a-z]")     = "ABC123"
090     * </pre>
091     *
092     * @param text  text to remove from, may be null
093     * @param regex  the regular expression to which this string is to be matched
094     * @return  the text with any removes processed,
095     *              {@code null} if null String input
096     *
097     * @throws  java.util.regex.PatternSyntaxException
098     *              if the regular expression's syntax is invalid
099     *
100     * @see #replaceAll(String, String, String)
101     * @see #removePattern(String, String)
102     * @see String#replaceAll(String, String)
103     * @see java.util.regex.Pattern
104     * @see java.util.regex.Pattern#DOTALL
105     */
106    public static String removeAll(final String text, final String regex) {
107        return replaceAll(text, regex, StringUtils.EMPTY);
108    }
109
110    /**
111     * <p>Removes the first substring of the text string that matches the given regular expression pattern.</p>
112     *
113     * This method is a {@code null} safe equivalent to:
114     * <ul>
115     *  <li>{@code pattern.matcher(text).replaceFirst(StringUtils.EMPTY)}</li>
116     * </ul>
117     *
118     * <p>A {@code null} reference passed to this method is a no-op.</p>
119     *
120     * <pre>
121     * StringUtils.removeFirst(null, *)      = null
122     * StringUtils.removeFirst("any", (Pattern) null)  = "any"
123     * StringUtils.removeFirst("any", Pattern.compile(""))    = "any"
124     * StringUtils.removeFirst("any", Pattern.compile(".*"))  = ""
125     * StringUtils.removeFirst("any", Pattern.compile(".+"))  = ""
126     * StringUtils.removeFirst("abc", Pattern.compile(".?"))  = "bc"
127     * StringUtils.removeFirst("A&lt;__&gt;\n&lt;__&gt;B", Pattern.compile("&lt;.*&gt;"))      = "A\n&lt;__&gt;B"
128     * StringUtils.removeFirst("A&lt;__&gt;\n&lt;__&gt;B", Pattern.compile("(?s)&lt;.*&gt;"))  = "AB"
129     * StringUtils.removeFirst("ABCabc123", Pattern.compile("[a-z]"))          = "ABCbc123"
130     * StringUtils.removeFirst("ABCabc123abc", Pattern.compile("[a-z]+"))      = "ABC123abc"
131     * </pre>
132     *
133     * @param text  text to remove from, may be null
134     * @param regex  the regular expression pattern to which this string is to be matched
135     * @return  the text with the first replacement processed,
136     *              {@code null} if null String input
137     *
138     * @see #replaceFirst(String, Pattern, String)
139     * @see java.util.regex.Matcher#replaceFirst(String)
140     * @see java.util.regex.Pattern
141     */
142    public static String removeFirst(final String text, final Pattern regex) {
143        return replaceFirst(text, regex, StringUtils.EMPTY);
144    }
145
146    /**
147     * <p>Removes the first substring of the text string that matches the given regular expression.</p>
148     *
149     * This method is a {@code null} safe equivalent to:
150     * <ul>
151     *  <li>{@code text.replaceFirst(regex, StringUtils.EMPTY)}</li>
152     *  <li>{@code Pattern.compile(regex).matcher(text).replaceFirst(StringUtils.EMPTY)}</li>
153     * </ul>
154     *
155     * <p>A {@code null} reference passed to this method is a no-op.</p>
156     *
157     * <p>The {@link Pattern#DOTALL} option is NOT automatically added.
158     * To use the DOTALL option prepend <code>"(?s)"</code> to the regex.
159     * DOTALL is also known as single-line mode in Perl.</p>
160     *
161     * <pre>
162     * StringUtils.removeFirst(null, *)      = null
163     * StringUtils.removeFirst("any", (String) null)  = "any"
164     * StringUtils.removeFirst("any", "")    = "any"
165     * StringUtils.removeFirst("any", ".*")  = ""
166     * StringUtils.removeFirst("any", ".+")  = ""
167     * StringUtils.removeFirst("abc", ".?")  = "bc"
168     * StringUtils.removeFirst("A&lt;__&gt;\n&lt;__&gt;B", "&lt;.*&gt;")      = "A\n&lt;__&gt;B"
169     * StringUtils.removeFirst("A&lt;__&gt;\n&lt;__&gt;B", "(?s)&lt;.*&gt;")  = "AB"
170     * StringUtils.removeFirst("ABCabc123", "[a-z]")          = "ABCbc123"
171     * StringUtils.removeFirst("ABCabc123abc", "[a-z]+")      = "ABC123abc"
172     * </pre>
173     *
174     * @param text  text to remove from, may be null
175     * @param regex  the regular expression to which this string is to be matched
176     * @return  the text with the first replacement processed,
177     *              {@code null} if null String input
178     *
179     * @throws  java.util.regex.PatternSyntaxException
180     *              if the regular expression's syntax is invalid
181     *
182     * @see #replaceFirst(String, String, String)
183     * @see String#replaceFirst(String, String)
184     * @see java.util.regex.Pattern
185     * @see java.util.regex.Pattern#DOTALL
186     */
187    public static String removeFirst(final String text, final String regex) {
188        return replaceFirst(text, regex, StringUtils.EMPTY);
189    }
190
191    /**
192     * <p>Removes each substring of the source String that matches the given regular expression using the DOTALL option.</p>
193     *
194     * This call is a {@code null} safe equivalent to:
195     * <ul>
196     * <li>{@code text.replaceAll(&quot;(?s)&quot; + regex, StringUtils.EMPTY)}</li>
197     * <li>{@code Pattern.compile(regex, Pattern.DOTALL).matcher(text).replaceAll(StringUtils.EMPTY)}</li>
198     * </ul>
199     *
200     * <p>A {@code null} reference passed to this method is a no-op.</p>
201     *
202     * <pre>
203     * StringUtils.removePattern(null, *)       = null
204     * StringUtils.removePattern("any", (String) null)   = "any"
205     * StringUtils.removePattern("A&lt;__&gt;\n&lt;__&gt;B", "&lt;.*&gt;")  = "AB"
206     * StringUtils.removePattern("ABCabc123", "[a-z]")    = "ABC123"
207     * </pre>
208     *
209     * @param text
210     *            the source string
211     * @param regex
212     *            the regular expression to which this string is to be matched
213     * @return The resulting {@code String}
214     * @see #replacePattern(String, String, String)
215     * @see String#replaceAll(String, String)
216     * @see Pattern#DOTALL
217     */
218    public static String removePattern(final String text, final String regex) {
219        return replacePattern(text, regex, StringUtils.EMPTY);
220    }
221
222    /**
223     * <p>Replaces each substring of the text String that matches the given regular expression pattern with the given replacement.</p>
224     *
225     * This method is a {@code null} safe equivalent to:
226     * <ul>
227     *  <li>{@code pattern.matcher(text).replaceAll(replacement)}</li>
228     * </ul>
229     *
230     * <p>A {@code null} reference passed to this method is a no-op.</p>
231     *
232     * <pre>
233     * StringUtils.replaceAll(null, *, *)       = null
234     * StringUtils.replaceAll("any", (Pattern) null, *)   = "any"
235     * StringUtils.replaceAll("any", *, null)   = "any"
236     * StringUtils.replaceAll("", Pattern.compile(""), "zzz")    = "zzz"
237     * StringUtils.replaceAll("", Pattern.compile(".*"), "zzz")  = "zzz"
238     * StringUtils.replaceAll("", Pattern.compile(".+"), "zzz")  = ""
239     * StringUtils.replaceAll("abc", Pattern.compile(""), "ZZ")  = "ZZaZZbZZcZZ"
240     * StringUtils.replaceAll("&lt;__&gt;\n&lt;__&gt;", Pattern.compile("&lt;.*&gt;"), "z")                 = "z\nz"
241     * StringUtils.replaceAll("&lt;__&gt;\n&lt;__&gt;", Pattern.compile("&lt;.*&gt;", Pattern.DOTALL), "z") = "z"
242     * StringUtils.replaceAll("&lt;__&gt;\n&lt;__&gt;", Pattern.compile("(?s)&lt;.*&gt;"), "z")             = "z"
243     * StringUtils.replaceAll("ABCabc123", Pattern.compile("[a-z]"), "_")       = "ABC___123"
244     * StringUtils.replaceAll("ABCabc123", Pattern.compile("[^A-Z0-9]+"), "_")  = "ABC_123"
245     * StringUtils.replaceAll("ABCabc123", Pattern.compile("[^A-Z0-9]+"), "")   = "ABC123"
246     * StringUtils.replaceAll("Lorem ipsum  dolor   sit", Pattern.compile("( +)([a-z]+)"), "_$2")  = "Lorem_ipsum_dolor_sit"
247     * </pre>
248     *
249     * @param text  text to search and replace in, may be null
250     * @param regex  the regular expression pattern to which this string is to be matched
251     * @param replacement  the string to be substituted for each match
252     * @return  the text with any replacements processed,
253     *              {@code null} if null String input
254     *
255     * @see java.util.regex.Matcher#replaceAll(String)
256     * @see java.util.regex.Pattern
257     */
258    public static String replaceAll(final String text, final Pattern regex, final String replacement) {
259        if (text == null || regex == null || replacement == null) {
260            return text;
261        }
262        return regex.matcher(text).replaceAll(replacement);
263    }
264
265    /**
266     * <p>Replaces each substring of the text String that matches the given regular expression
267     * with the given replacement.</p>
268     *
269     * This method is a {@code null} safe equivalent to:
270     * <ul>
271     *  <li>{@code text.replaceAll(regex, replacement)}</li>
272     *  <li>{@code Pattern.compile(regex).matcher(text).replaceAll(replacement)}</li>
273     * </ul>
274     *
275     * <p>A {@code null} reference passed to this method is a no-op.</p>
276     *
277     * <p>Unlike in the {@link #replacePattern(String, String, String)} method, the {@link Pattern#DOTALL} option
278     * is NOT automatically added.
279     * To use the DOTALL option prepend <code>"(?s)"</code> to the regex.
280     * DOTALL is also known as single-line mode in Perl.</p>
281     *
282     * <pre>
283     * StringUtils.replaceAll(null, *, *)       = null
284     * StringUtils.replaceAll("any", (String) null, *)   = "any"
285     * StringUtils.replaceAll("any", *, null)   = "any"
286     * StringUtils.replaceAll("", "", "zzz")    = "zzz"
287     * StringUtils.replaceAll("", ".*", "zzz")  = "zzz"
288     * StringUtils.replaceAll("", ".+", "zzz")  = ""
289     * StringUtils.replaceAll("abc", "", "ZZ")  = "ZZaZZbZZcZZ"
290     * StringUtils.replaceAll("&lt;__&gt;\n&lt;__&gt;", "&lt;.*&gt;", "z")      = "z\nz"
291     * StringUtils.replaceAll("&lt;__&gt;\n&lt;__&gt;", "(?s)&lt;.*&gt;", "z")  = "z"
292     * StringUtils.replaceAll("ABCabc123", "[a-z]", "_")       = "ABC___123"
293     * StringUtils.replaceAll("ABCabc123", "[^A-Z0-9]+", "_")  = "ABC_123"
294     * StringUtils.replaceAll("ABCabc123", "[^A-Z0-9]+", "")   = "ABC123"
295     * StringUtils.replaceAll("Lorem ipsum  dolor   sit", "( +)([a-z]+)", "_$2")  = "Lorem_ipsum_dolor_sit"
296     * </pre>
297     *
298     * @param text  text to search and replace in, may be null
299     * @param regex  the regular expression to which this string is to be matched
300     * @param replacement  the string to be substituted for each match
301     * @return  the text with any replacements processed,
302     *              {@code null} if null String input
303     *
304     * @throws  java.util.regex.PatternSyntaxException
305     *              if the regular expression's syntax is invalid
306     *
307     * @see #replacePattern(String, String, String)
308     * @see String#replaceAll(String, String)
309     * @see java.util.regex.Pattern
310     * @see java.util.regex.Pattern#DOTALL
311     */
312    public static String replaceAll(final String text, final String regex, final String replacement) {
313        if (text == null || regex == null || replacement == null) {
314            return text;
315        }
316        return text.replaceAll(regex, replacement);
317    }
318
319    /**
320     * <p>Replaces the first substring of the text string that matches the given regular expression pattern
321     * with the given replacement.</p>
322     *
323     * This method is a {@code null} safe equivalent to:
324     * <ul>
325     *  <li>{@code pattern.matcher(text).replaceFirst(replacement)}</li>
326     * </ul>
327     *
328     * <p>A {@code null} reference passed to this method is a no-op.</p>
329     *
330     * <pre>
331     * StringUtils.replaceFirst(null, *, *)       = null
332     * StringUtils.replaceFirst("any", (Pattern) null, *)   = "any"
333     * StringUtils.replaceFirst("any", *, null)   = "any"
334     * StringUtils.replaceFirst("", Pattern.compile(""), "zzz")    = "zzz"
335     * StringUtils.replaceFirst("", Pattern.compile(".*"), "zzz")  = "zzz"
336     * StringUtils.replaceFirst("", Pattern.compile(".+"), "zzz")  = ""
337     * StringUtils.replaceFirst("abc", Pattern.compile(""), "ZZ")  = "ZZabc"
338     * StringUtils.replaceFirst("&lt;__&gt;\n&lt;__&gt;", Pattern.compile("&lt;.*&gt;"), "z")      = "z\n&lt;__&gt;"
339     * StringUtils.replaceFirst("&lt;__&gt;\n&lt;__&gt;", Pattern.compile("(?s)&lt;.*&gt;"), "z")  = "z"
340     * StringUtils.replaceFirst("ABCabc123", Pattern.compile("[a-z]"), "_")          = "ABC_bc123"
341     * StringUtils.replaceFirst("ABCabc123abc", Pattern.compile("[^A-Z0-9]+"), "_")  = "ABC_123abc"
342     * StringUtils.replaceFirst("ABCabc123abc", Pattern.compile("[^A-Z0-9]+"), "")   = "ABC123abc"
343     * StringUtils.replaceFirst("Lorem ipsum  dolor   sit", Pattern.compile("( +)([a-z]+)"), "_$2")  = "Lorem_ipsum  dolor   sit"
344     * </pre>
345     *
346     * @param text  text to search and replace in, may be null
347     * @param regex  the regular expression pattern to which this string is to be matched
348     * @param replacement  the string to be substituted for the first match
349     * @return  the text with the first replacement processed,
350     *              {@code null} if null String input
351     *
352     * @see java.util.regex.Matcher#replaceFirst(String)
353     * @see java.util.regex.Pattern
354     */
355    public static String replaceFirst(final String text, final Pattern regex, final String replacement) {
356        if (text == null || regex == null|| replacement == null ) {
357            return text;
358        }
359        return regex.matcher(text).replaceFirst(replacement);
360    }
361
362    /**
363     * <p>Replaces the first substring of the text string that matches the given regular expression
364     * with the given replacement.</p>
365     *
366     * This method is a {@code null} safe equivalent to:
367     * <ul>
368     *  <li>{@code text.replaceFirst(regex, replacement)}</li>
369     *  <li>{@code Pattern.compile(regex).matcher(text).replaceFirst(replacement)}</li>
370     * </ul>
371     *
372     * <p>A {@code null} reference passed to this method is a no-op.</p>
373     *
374     * <p>The {@link Pattern#DOTALL} option is NOT automatically added.
375     * To use the DOTALL option prepend <code>"(?s)"</code> to the regex.
376     * DOTALL is also known as single-line mode in Perl.</p>
377     *
378     * <pre>
379     * StringUtils.replaceFirst(null, *, *)       = null
380     * StringUtils.replaceFirst("any", (String) null, *)   = "any"
381     * StringUtils.replaceFirst("any", *, null)   = "any"
382     * StringUtils.replaceFirst("", "", "zzz")    = "zzz"
383     * StringUtils.replaceFirst("", ".*", "zzz")  = "zzz"
384     * StringUtils.replaceFirst("", ".+", "zzz")  = ""
385     * StringUtils.replaceFirst("abc", "", "ZZ")  = "ZZabc"
386     * StringUtils.replaceFirst("&lt;__&gt;\n&lt;__&gt;", "&lt;.*&gt;", "z")      = "z\n&lt;__&gt;"
387     * StringUtils.replaceFirst("&lt;__&gt;\n&lt;__&gt;", "(?s)&lt;.*&gt;", "z")  = "z"
388     * StringUtils.replaceFirst("ABCabc123", "[a-z]", "_")          = "ABC_bc123"
389     * StringUtils.replaceFirst("ABCabc123abc", "[^A-Z0-9]+", "_")  = "ABC_123abc"
390     * StringUtils.replaceFirst("ABCabc123abc", "[^A-Z0-9]+", "")   = "ABC123abc"
391     * StringUtils.replaceFirst("Lorem ipsum  dolor   sit", "( +)([a-z]+)", "_$2")  = "Lorem_ipsum  dolor   sit"
392     * </pre>
393     *
394     * @param text  text to search and replace in, may be null
395     * @param regex  the regular expression to which this string is to be matched
396     * @param replacement  the string to be substituted for the first match
397     * @return  the text with the first replacement processed,
398     *              {@code null} if null String input
399     *
400     * @throws  java.util.regex.PatternSyntaxException
401     *              if the regular expression's syntax is invalid
402     *
403     * @see String#replaceFirst(String, String)
404     * @see java.util.regex.Pattern
405     * @see java.util.regex.Pattern#DOTALL
406     */
407    public static String replaceFirst(final String text, final String regex, final String replacement) {
408        if (text == null || regex == null|| replacement == null ) {
409            return text;
410        }
411        return text.replaceFirst(regex, replacement);
412    }
413
414    /**
415     * <p>Replaces each substring of the source String that matches the given regular expression with the given
416     * replacement using the {@link Pattern#DOTALL} option. DOTALL is also known as single-line mode in Perl.</p>
417     *
418     * This call is a {@code null} safe equivalent to:
419     * <ul>
420     * <li>{@code text.replaceAll(&quot;(?s)&quot; + regex, replacement)}</li>
421     * <li>{@code Pattern.compile(regex, Pattern.DOTALL).matcher(text).replaceAll(replacement)}</li>
422     * </ul>
423     *
424     * <p>A {@code null} reference passed to this method is a no-op.</p>
425     *
426     * <pre>
427     * StringUtils.replacePattern(null, *, *)       = null
428     * StringUtils.replacePattern("any", (String) null, *)   = "any"
429     * StringUtils.replacePattern("any", *, null)   = "any"
430     * StringUtils.replacePattern("", "", "zzz")    = "zzz"
431     * StringUtils.replacePattern("", ".*", "zzz")  = "zzz"
432     * StringUtils.replacePattern("", ".+", "zzz")  = ""
433     * StringUtils.replacePattern("&lt;__&gt;\n&lt;__&gt;", "&lt;.*&gt;", "z")       = "z"
434     * StringUtils.replacePattern("ABCabc123", "[a-z]", "_")       = "ABC___123"
435     * StringUtils.replacePattern("ABCabc123", "[^A-Z0-9]+", "_")  = "ABC_123"
436     * StringUtils.replacePattern("ABCabc123", "[^A-Z0-9]+", "")   = "ABC123"
437     * StringUtils.replacePattern("Lorem ipsum  dolor   sit", "( +)([a-z]+)", "_$2")  = "Lorem_ipsum_dolor_sit"
438     * </pre>
439     *
440     * @param text
441     *            the source string
442     * @param regex
443     *            the regular expression to which this string is to be matched
444     * @param replacement
445     *            the string to be substituted for each match
446     * @return The resulting {@code String}
447     * @see #replaceAll(String, String, String)
448     * @see String#replaceAll(String, String)
449     * @see Pattern#DOTALL
450     */
451    public static String replacePattern(final String text, final String regex, final String replacement) {
452        if (text == null || regex == null || replacement == null) {
453            return text;
454        }
455        return Pattern.compile(regex, Pattern.DOTALL).matcher(text).replaceAll(replacement);
456    }
457
458}