001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.lang3;
018
019import java.io.IOException;
020import java.io.Writer;
021
022import org.apache.commons.lang3.text.translate.AggregateTranslator;
023import org.apache.commons.lang3.text.translate.CharSequenceTranslator;
024import org.apache.commons.lang3.text.translate.EntityArrays;
025import org.apache.commons.lang3.text.translate.JavaUnicodeEscaper;
026import org.apache.commons.lang3.text.translate.LookupTranslator;
027import org.apache.commons.lang3.text.translate.NumericEntityEscaper;
028import org.apache.commons.lang3.text.translate.NumericEntityUnescaper;
029import org.apache.commons.lang3.text.translate.OctalUnescaper;
030import org.apache.commons.lang3.text.translate.UnicodeUnescaper;
031import org.apache.commons.lang3.text.translate.UnicodeUnpairedSurrogateRemover;
032
033/**
034 * Escapes and unescapes {@link String}s for
035 * Java, Java Script, HTML and XML.
036 *
037 * <p>#ThreadSafe#</p>
038 * @since 2.0
039 * @deprecated As of 3.6, use Apache Commons Text
040 * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/StringEscapeUtils.html">
041 * StringEscapeUtils</a> instead
042 */
043@Deprecated
044public class StringEscapeUtils {
045
046    /* ESCAPE TRANSLATORS */
047
048    private static final class CsvEscaper extends CharSequenceTranslator {
049
050        private static final char CSV_DELIMITER = ',';
051        private static final char CSV_QUOTE = '"';
052        private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
053        private static final char[] CSV_SEARCH_CHARS = { CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF };
054
055        @Override
056        public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
057            if (index != 0) {
058                throw new IllegalStateException("CsvEscaper should never reach the [1] index");
059            }
060            if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) {
061                out.write(input.toString());
062            } else {
063                out.write(CSV_QUOTE);
064                out.write(Strings.CS.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR));
065                out.write(CSV_QUOTE);
066            }
067            return Character.codePointCount(input, 0, input.length());
068        }
069    }
070
071    private static final class CsvUnescaper extends CharSequenceTranslator {
072
073        private static final char CSV_DELIMITER = ',';
074        private static final char CSV_QUOTE = '"';
075        private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
076        private static final char[] CSV_SEARCH_CHARS = {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};
077
078        @Override
079        public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
080            if (index != 0) {
081                throw new IllegalStateException("CsvUnescaper should never reach the [1] index");
082            }
083            if (input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE) {
084                out.write(input.toString());
085                return Character.codePointCount(input, 0, input.length());
086            }
087            // strip quotes
088            final String quoteless = input.subSequence(1, input.length() - 1).toString();
089            if (StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS)) {
090                // deal with escaped quotes; ie) ""
091                out.write(Strings.CS.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR));
092            } else {
093                out.write(input.toString());
094            }
095            return Character.codePointCount(input, 0, input.length());
096        }
097    }
098
099    /**
100     * Translator object for escaping Java.
101     *
102     * While {@link #escapeJava(String)} is the expected method of use, this
103     * object allows the Java escaping functionality to be used
104     * as the foundation for a custom translator.
105     *
106     * @since 3.0
107     */
108    public static final CharSequenceTranslator ESCAPE_JAVA =
109          new LookupTranslator(
110            new String[][] {
111              {"\"", "\\\""},
112              {"\\", "\\\\"},
113          }).with(
114            new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE())
115          ).with(
116            JavaUnicodeEscaper.outsideOf(32, 0x7f)
117        );
118
119    /**
120     * Translator object for escaping EcmaScript/JavaScript.
121     *
122     * While {@link #escapeEcmaScript(String)} is the expected method of use, this
123     * object allows the EcmaScript escaping functionality to be used
124     * as the foundation for a custom translator.
125     *
126     * @since 3.0
127     */
128    public static final CharSequenceTranslator ESCAPE_ECMASCRIPT =
129        new AggregateTranslator(
130            new LookupTranslator(
131                      new String[][] {
132                            {"'", "\\'"},
133                            {"\"", "\\\""},
134                            {"\\", "\\\\"},
135                            {"/", "\\/"}
136                      }),
137            new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
138            JavaUnicodeEscaper.outsideOf(32, 0x7f)
139        );
140
141    /**
142     * Translator object for escaping Json.
143     *
144     * While {@link #escapeJson(String)} is the expected method of use, this
145     * object allows the Json escaping functionality to be used
146     * as the foundation for a custom translator.
147     *
148     * @since 3.2
149     */
150    public static final CharSequenceTranslator ESCAPE_JSON =
151        new AggregateTranslator(
152            new LookupTranslator(
153                      new String[][] {
154                            {"\"", "\\\""},
155                            {"\\", "\\\\"},
156                            {"/", "\\/"}
157                      }),
158            new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
159            JavaUnicodeEscaper.outsideOf(32, 0x7f)
160        );
161
162    /**
163     * Translator object for escaping XML.
164     *
165     * While {@link #escapeXml(String)} is the expected method of use, this
166     * object allows the XML escaping functionality to be used
167     * as the foundation for a custom translator.
168     *
169     * @since 3.0
170     * @deprecated use {@link #ESCAPE_XML10} or {@link #ESCAPE_XML11} instead.
171     */
172    @Deprecated
173    public static final CharSequenceTranslator ESCAPE_XML =
174        new AggregateTranslator(
175            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
176            new LookupTranslator(EntityArrays.APOS_ESCAPE())
177        );
178
179    /**
180     * Translator object for escaping XML 1.0.
181     *
182     * While {@link #escapeXml10(String)} is the expected method of use, this
183     * object allows the XML escaping functionality to be used
184     * as the foundation for a custom translator.
185     *
186     * @since 3.3
187     */
188    public static final CharSequenceTranslator ESCAPE_XML10 =
189        new AggregateTranslator(
190            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
191            new LookupTranslator(EntityArrays.APOS_ESCAPE()),
192            new LookupTranslator(
193                    new String[][] {
194                            { "\u0000", StringUtils.EMPTY },
195                            { "\u0001", StringUtils.EMPTY },
196                            { "\u0002", StringUtils.EMPTY },
197                            { "\u0003", StringUtils.EMPTY },
198                            { "\u0004", StringUtils.EMPTY },
199                            { "\u0005", StringUtils.EMPTY },
200                            { "\u0006", StringUtils.EMPTY },
201                            { "\u0007", StringUtils.EMPTY },
202                            { "\u0008", StringUtils.EMPTY },
203                            { "\u000b", StringUtils.EMPTY },
204                            { "\u000c", StringUtils.EMPTY },
205                            { "\u000e", StringUtils.EMPTY },
206                            { "\u000f", StringUtils.EMPTY },
207                            { "\u0010", StringUtils.EMPTY },
208                            { "\u0011", StringUtils.EMPTY },
209                            { "\u0012", StringUtils.EMPTY },
210                            { "\u0013", StringUtils.EMPTY },
211                            { "\u0014", StringUtils.EMPTY },
212                            { "\u0015", StringUtils.EMPTY },
213                            { "\u0016", StringUtils.EMPTY },
214                            { "\u0017", StringUtils.EMPTY },
215                            { "\u0018", StringUtils.EMPTY },
216                            { "\u0019", StringUtils.EMPTY },
217                            { "\u001a", StringUtils.EMPTY },
218                            { "\u001b", StringUtils.EMPTY },
219                            { "\u001c", StringUtils.EMPTY },
220                            { "\u001d", StringUtils.EMPTY },
221                            { "\u001e", StringUtils.EMPTY },
222                            { "\u001f", StringUtils.EMPTY },
223                            { "\ufffe", StringUtils.EMPTY },
224                            { "\uffff", StringUtils.EMPTY }
225                    }),
226            NumericEntityEscaper.between(0x7f, 0x84),
227            NumericEntityEscaper.between(0x86, 0x9f),
228            new UnicodeUnpairedSurrogateRemover()
229        );
230
231    /**
232     * Translator object for escaping XML 1.1.
233     *
234     * While {@link #escapeXml11(String)} is the expected method of use, this
235     * object allows the XML escaping functionality to be used
236     * as the foundation for a custom translator.
237     *
238     * @since 3.3
239     */
240    public static final CharSequenceTranslator ESCAPE_XML11 =
241        new AggregateTranslator(
242            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
243            new LookupTranslator(EntityArrays.APOS_ESCAPE()),
244            new LookupTranslator(
245                    new String[][] {
246                            { "\u0000", StringUtils.EMPTY },
247                            { "\u000b", "&#11;" },
248                            { "\u000c", "&#12;" },
249                            { "\ufffe", StringUtils.EMPTY },
250                            { "\uffff", StringUtils.EMPTY }
251                    }),
252            NumericEntityEscaper.between(0x1, 0x8),
253            NumericEntityEscaper.between(0xe, 0x1f),
254            NumericEntityEscaper.between(0x7f, 0x84),
255            NumericEntityEscaper.between(0x86, 0x9f),
256            new UnicodeUnpairedSurrogateRemover()
257        );
258
259    /**
260     * Translator object for escaping HTML version 3.0.
261     *
262     * While {@link #escapeHtml3(String)} is the expected method of use, this
263     * object allows the HTML escaping functionality to be used
264     * as the foundation for a custom translator.
265     *
266     * @since 3.0
267     */
268    public static final CharSequenceTranslator ESCAPE_HTML3 =
269        new AggregateTranslator(
270            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
271            new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE())
272        );
273
274    /**
275     * Translator object for escaping HTML version 4.0.
276     *
277     * While {@link #escapeHtml4(String)} is the expected method of use, this
278     * object allows the HTML escaping functionality to be used
279     * as the foundation for a custom translator.
280     *
281     * @since 3.0
282     */
283    public static final CharSequenceTranslator ESCAPE_HTML4 =
284        new AggregateTranslator(
285            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
286            new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()),
287            new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE())
288        );
289
290    /* UNESCAPE TRANSLATORS */
291
292    /**
293     * Translator object for escaping individual Comma Separated Values.
294     *
295     * While {@link #escapeCsv(String)} is the expected method of use, this
296     * object allows the CSV escaping functionality to be used
297     * as the foundation for a custom translator.
298     *
299     * @since 3.0
300     */
301    public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper();
302
303    /**
304     * Translator object for unescaping escaped Java.
305     *
306     * While {@link #unescapeJava(String)} is the expected method of use, this
307     * object allows the Java unescaping functionality to be used
308     * as the foundation for a custom translator.
309     *
310     * @since 3.0
311     */
312    // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)?
313    public static final CharSequenceTranslator UNESCAPE_JAVA =
314        new AggregateTranslator(
315            new OctalUnescaper(),     // .between('\1', '\377'),
316            new UnicodeUnescaper(),
317            new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()),
318            new LookupTranslator(
319                      new String[][] {
320                            {"\\\\", "\\"},
321                            {"\\\"", "\""},
322                            {"\\'", "'"},
323                            {"\\", ""}
324                      })
325        );
326
327    /**
328     * Translator object for unescaping escaped EcmaScript.
329     *
330     * While {@link #unescapeEcmaScript(String)} is the expected method of use, this
331     * object allows the EcmaScript unescaping functionality to be used
332     * as the foundation for a custom translator.
333     *
334     * @since 3.0
335     */
336    public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA;
337
338    /**
339     * Translator object for unescaping escaped Json.
340     *
341     * While {@link #unescapeJson(String)} is the expected method of use, this
342     * object allows the Json unescaping functionality to be used
343     * as the foundation for a custom translator.
344     *
345     * @since 3.2
346     */
347    public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA;
348
349    /**
350     * Translator object for unescaping escaped HTML 3.0.
351     *
352     * While {@link #unescapeHtml3(String)} is the expected method of use, this
353     * object allows the HTML unescaping functionality to be used
354     * as the foundation for a custom translator.
355     *
356     * @since 3.0
357     */
358    public static final CharSequenceTranslator UNESCAPE_HTML3 =
359        new AggregateTranslator(
360            new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
361            new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
362            new NumericEntityUnescaper()
363        );
364
365    /**
366     * Translator object for unescaping escaped HTML 4.0.
367     *
368     * While {@link #unescapeHtml4(String)} is the expected method of use, this
369     * object allows the HTML unescaping functionality to be used
370     * as the foundation for a custom translator.
371     *
372     * @since 3.0
373     */
374    public static final CharSequenceTranslator UNESCAPE_HTML4 =
375        new AggregateTranslator(
376            new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
377            new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
378            new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()),
379            new NumericEntityUnescaper()
380        );
381
382    /**
383     * Translator object for unescaping escaped XML.
384     *
385     * While {@link #unescapeXml(String)} is the expected method of use, this
386     * object allows the XML unescaping functionality to be used
387     * as the foundation for a custom translator.
388     *
389     * @since 3.0
390     */
391    public static final CharSequenceTranslator UNESCAPE_XML =
392        new AggregateTranslator(
393            new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
394            new LookupTranslator(EntityArrays.APOS_UNESCAPE()),
395            new NumericEntityUnescaper()
396        );
397
398    /**
399     * Translator object for unescaping escaped Comma Separated Value entries.
400     *
401     * While {@link #unescapeCsv(String)} is the expected method of use, this
402     * object allows the CSV unescaping functionality to be used
403     * as the foundation for a custom translator.
404     *
405     * @since 3.0
406     */
407    public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper();
408
409    /* Helper functions */
410
411    /**
412     * Returns a {@link String} value for a CSV column enclosed in double quotes,
413     * if required.
414     *
415     * <p>If the value contains a comma, newline or double quote, then the
416     *    String value is returned enclosed in double quotes.</p>
417     *
418     * <p>Any double quote characters in the value are escaped with another double quote.</p>
419     *
420     * <p>If the value does not contain a comma, newline or double quote, then the
421     *    String value is returned unchanged.</p>
422     *
423     * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
424     * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>.
425     *
426     * @param input the input CSV column String, may be null
427     * @return the input String, enclosed in double quotes if the value contains a comma,
428     * newline or double quote, {@code null} if null string input
429     * @since 2.4
430     */
431    public static final String escapeCsv(final String input) {
432        return ESCAPE_CSV.translate(input);
433    }
434
435    /**
436     * Escapes the characters in a {@link String} using EcmaScript String rules.
437     * <p>Escapes any values it finds into their EcmaScript String form.
438     * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
439     *
440     * <p>So a tab becomes the characters {@code '\\'} and
441     * {@code 't'}.</p>
442     *
443     * <p>The only difference between Java strings and EcmaScript strings
444     * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p>
445     *
446     * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects.</p>
447     *
448     * <p>Example:</p>
449     * <pre>
450     * input string: He didn't say, "Stop!"
451     * output string: He didn\'t say, \"Stop!\"
452     * </pre>
453     *
454     * @param input  String to escape values in, may be null
455     * @return String with escaped values, {@code null} if null string input
456     * @since 3.0
457     */
458    public static final String escapeEcmaScript(final String input) {
459        return ESCAPE_ECMASCRIPT.translate(input);
460    }
461
462    /**
463     * Escapes the characters in a {@link String} using HTML entities.
464     * <p>Supports only the HTML 3.0 entities.</p>
465     *
466     * @param input  the {@link String} to escape, may be null
467     * @return a new escaped {@link String}, {@code null} if null string input
468     * @since 3.0
469     */
470    public static final String escapeHtml3(final String input) {
471        return ESCAPE_HTML3.translate(input);
472    }
473
474    /**
475     * Escapes the characters in a {@link String} using HTML entities.
476     *
477     * <p>
478     * For example:
479     * </p>
480     * <p>{@code "bread" &amp; "butter"}</p>
481     * becomes:
482     * <p>
483     * {@code &amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;}.
484     * </p>
485     *
486     * <p>Supports all known HTML 4.0 entities, including funky accents.
487     * Note that the commonly used apostrophe escape character (&amp;apos;)
488     * is not a legal entity and so is not supported).</p>
489     *
490     * @param input  the {@link String} to escape, may be null
491     * @return a new escaped {@link String}, {@code null} if null string input
492     * @see <a href="https://web.archive.org/web/20060225074150/https://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
493     * @see <a href="https://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
494     * @see <a href="https://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
495     * @see <a href="https://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
496     * @see <a href="https://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
497     * @since 3.0
498     */
499    public static final String escapeHtml4(final String input) {
500        return ESCAPE_HTML4.translate(input);
501    }
502
503    /**
504     * Escapes the characters in a {@link String} using Java String rules.
505     *
506     * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
507     *
508     * <p>So a tab becomes the characters {@code '\\'} and
509     * {@code 't'}.</p>
510     *
511     * <p>The only difference between Java strings and JavaScript strings
512     * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p>
513     *
514     * <p>Example:</p>
515     * <pre>
516     * input string: He didn't say, "Stop!"
517     * output string: He didn't say, \"Stop!\"
518     * </pre>
519     *
520     * @param input  String to escape values in, may be null
521     * @return String with escaped values, {@code null} if null string input
522     */
523    public static final String escapeJava(final String input) {
524        return ESCAPE_JAVA.translate(input);
525    }
526
527    /**
528     * Escapes the characters in a {@link String} using Json String rules.
529     * <p>Escapes any values it finds into their Json String form.
530     * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
531     *
532     * <p>So a tab becomes the characters {@code '\\'} and
533     * {@code 't'}.</p>
534     *
535     * <p>The only difference between Java strings and Json strings
536     * is that in Json, forward-slash (/) is escaped.</p>
537     *
538     * <p>See https://www.ietf.org/rfc/rfc4627.txt for further details.</p>
539     *
540     * <p>Example:</p>
541     * <pre>
542     * input string: He didn't say, "Stop!"
543     * output string: He didn't say, \"Stop!\"
544     * </pre>
545     *
546     * @param input  String to escape values in, may be null
547     * @return String with escaped values, {@code null} if null string input
548     * @since 3.2
549     */
550    public static final String escapeJson(final String input) {
551        return ESCAPE_JSON.translate(input);
552    }
553
554    /**
555     * Escapes the characters in a {@link String} using XML entities.
556     *
557     * <p>For example: {@code "bread" & "butter"} =&gt;
558     * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
559     * </p>
560     *
561     * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
562     * Does not support DTDs or external entities.</p>
563     *
564     * <p>Note that Unicode characters greater than 0x7f are as of 3.0, no longer
565     *    escaped. If you still wish this functionality, you can achieve it
566     *    via the following:
567     * {@code StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE));}</p>
568     *
569     * @param input  the {@link String} to escape, may be null
570     * @return a new escaped {@link String}, {@code null} if null string input
571     * @see #unescapeXml(String)
572     * @deprecated use {@link #escapeXml10(java.lang.String)} or {@link #escapeXml11(java.lang.String)} instead.
573     */
574    @Deprecated
575    public static final String escapeXml(final String input) {
576        return ESCAPE_XML.translate(input);
577    }
578
579    /**
580     * Escapes the characters in a {@link String} using XML entities.
581     * <p>
582     * For example:
583     * </p>
584     *
585     * <pre>{@code
586     * "bread" & "butter"
587     * }</pre>
588     * <p>
589     * converts to:
590     * </p>
591     *
592     * <pre>
593     * {@code
594     * &quot;bread&quot; &amp; &quot;butter&quot;
595     * }
596     * </pre>
597     *
598     * <p>
599     * Note that XML 1.0 is a text-only format: it cannot represent control characters or unpaired Unicode surrogate code points, even after escaping. The
600     * method {@code escapeXml10} will remove characters that do not fit in the following ranges:
601     * </p>
602     *
603     * <p>
604     * {@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}
605     * </p>
606     *
607     * <p>
608     * Though not strictly necessary, {@code escapeXml10} will escape characters in the following ranges:
609     * </p>
610     *
611     * <p>
612     * {@code [#x7F-#x84] | [#x86-#x9F]}
613     * </p>
614     *
615     * <p>
616     * The returned string can be inserted into a valid XML 1.0 or XML 1.1 document. If you want to allow more non-text characters in an XML 1.1 document, use
617     * {@link #escapeXml11(String)}.
618     * </p>
619     *
620     * @param input the {@link String} to escape, may be null
621     * @return a new escaped {@link String}, {@code null} if null string input
622     * @see #unescapeXml(String)
623     * @since 3.3
624     */
625    public static String escapeXml10(final String input) {
626        return ESCAPE_XML10.translate(input);
627    }
628
629    /**
630     * Escapes the characters in a {@link String} using XML entities.
631     *
632     * <p>For example: {@code "bread" & "butter"} =&gt;
633     * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
634     * </p>
635     *
636     * <p>XML 1.1 can represent certain control characters, but it cannot represent
637     * the null byte or unpaired Unicode surrogate code points, even after escaping.
638     * {@code escapeXml11} will remove characters that do not fit in the following
639     * ranges:</p>
640     *
641     * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
642     *
643     * <p>{@code escapeXml11} will escape characters in the following ranges:</p>
644     *
645     * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p>
646     *
647     * <p>The returned string can be inserted into a valid XML 1.1 document. Do not
648     * use it for XML 1.0 documents.</p>
649     *
650     * @param input  the {@link String} to escape, may be null
651     * @return a new escaped {@link String}, {@code null} if null string input
652     * @see #unescapeXml(String)
653     * @since 3.3
654     */
655    public static String escapeXml11(final String input) {
656        return ESCAPE_XML11.translate(input);
657    }
658
659    /**
660     * Returns a {@link String} value for an unescaped CSV column.
661     *
662     * <p>If the value is enclosed in double quotes, and contains a comma, newline
663     *    or double quote, then quotes are removed.
664     * </p>
665     *
666     * <p>Any double quote escaped characters (a pair of double quotes) are unescaped
667     *    to just one double quote.</p>
668     *
669     * <p>If the value is not enclosed in double quotes, or is and does not contain a
670     *    comma, newline or double quote, then the String value is returned unchanged.</p>
671     *
672     * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
673     * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>.
674     *
675     * @param input the input CSV column String, may be null
676     * @return the input String, with enclosing double quotes removed and embedded double
677     * quotes unescaped, {@code null} if null string input
678     * @since 2.4
679     */
680    public static final String unescapeCsv(final String input) {
681        return UNESCAPE_CSV.translate(input);
682    }
683
684    /**
685     * Unescapes any EcmaScript literals found in the {@link String}.
686     *
687     * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
688     * into a newline character, unless the {@code '\'} is preceded by another
689     * {@code '\'}.</p>
690     *
691     * @see #unescapeJava(String)
692     * @param input  the {@link String} to unescape, may be null
693     * @return A new unescaped {@link String}, {@code null} if null string input
694     * @since 3.0
695     */
696    public static final String unescapeEcmaScript(final String input) {
697        return UNESCAPE_ECMASCRIPT.translate(input);
698    }
699
700    /**
701     * Unescapes a string containing entity escapes to a string
702     * containing the actual Unicode characters corresponding to the
703     * escapes. Supports only HTML 3.0 entities.
704     *
705     * @param input  the {@link String} to unescape, may be null
706     * @return a new unescaped {@link String}, {@code null} if null string input
707     * @since 3.0
708     */
709    public static final String unescapeHtml3(final String input) {
710        return UNESCAPE_HTML3.translate(input);
711    }
712
713    /**
714     * Unescapes a string containing entity escapes to a string
715     * containing the actual Unicode characters corresponding to the
716     * escapes. Supports HTML 4.0 entities.
717     *
718     * <p>For example, the string {@code "&lt;Fran&ccedil;ais&gt;"}
719     * will become {@code "<Français>"}</p>
720     *
721     * <p>If an entity is unrecognized, it is left alone, and inserted
722     * verbatim into the result string. e.g. {@code "&gt;&zzzz;x"} will
723     * become {@code ">&zzzz;x"}.</p>
724     *
725     * @param input  the {@link String} to unescape, may be null
726     * @return a new unescaped {@link String}, {@code null} if null string input
727     * @since 3.0
728     */
729    public static final String unescapeHtml4(final String input) {
730        return UNESCAPE_HTML4.translate(input);
731    }
732
733    /**
734     * Unescapes any Java literals found in the {@link String}.
735     * For example, it will turn a sequence of {@code '\'} and
736     * {@code 'n'} into a newline character, unless the {@code '\'}
737     * is preceded by another {@code '\'}.
738     *
739     * @param input  the {@link String} to unescape, may be null
740     * @return a new unescaped {@link String}, {@code null} if null string input
741     */
742    public static final String unescapeJava(final String input) {
743        return UNESCAPE_JAVA.translate(input);
744    }
745
746    /**
747     * Unescapes any Json literals found in the {@link String}.
748     *
749     * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
750     * into a newline character, unless the {@code '\'} is preceded by another
751     * {@code '\'}.</p>
752     *
753     * @see #unescapeJava(String)
754     * @param input  the {@link String} to unescape, may be null
755     * @return A new unescaped {@link String}, {@code null} if null string input
756     * @since 3.2
757     */
758    public static final String unescapeJson(final String input) {
759        return UNESCAPE_JSON.translate(input);
760    }
761
762    /**
763     * Unescapes a string containing XML entity escapes to a string
764     * containing the actual Unicode characters corresponding to the
765     * escapes.
766     *
767     * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
768     * Does not support DTDs or external entities.</p>
769     *
770     * <p>Note that numerical \\u Unicode codes are unescaped to their respective
771     *    Unicode characters. This may change in future releases.</p>
772     *
773     * @param input  the {@link String} to unescape, may be null
774     * @return a new unescaped {@link String}, {@code null} if null string input
775     * @see #escapeXml(String)
776     * @see #escapeXml10(String)
777     * @see #escapeXml11(String)
778     */
779    public static final String unescapeXml(final String input) {
780        return UNESCAPE_XML.translate(input);
781    }
782
783    /**
784     * {@link StringEscapeUtils} instances should NOT be constructed in
785     * standard programming.
786     *
787     * <p>Instead, the class should be used as:</p>
788     * <pre>StringEscapeUtils.escapeJava("foo");</pre>
789     *
790     * <p>This constructor is public to permit tools that require a JavaBean
791     * instance to operate.</p>
792     *
793     * @deprecated TODO Make private in 4.0.
794     */
795    @Deprecated
796    public StringEscapeUtils() {
797        // empty
798    }
799
800}