StringEscapeUtils.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      https://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.lang3;

  18. import java.io.IOException;
  19. import java.io.Writer;

  20. import org.apache.commons.lang3.text.translate.AggregateTranslator;
  21. import org.apache.commons.lang3.text.translate.CharSequenceTranslator;
  22. import org.apache.commons.lang3.text.translate.EntityArrays;
  23. import org.apache.commons.lang3.text.translate.JavaUnicodeEscaper;
  24. import org.apache.commons.lang3.text.translate.LookupTranslator;
  25. import org.apache.commons.lang3.text.translate.NumericEntityEscaper;
  26. import org.apache.commons.lang3.text.translate.NumericEntityUnescaper;
  27. import org.apache.commons.lang3.text.translate.OctalUnescaper;
  28. import org.apache.commons.lang3.text.translate.UnicodeUnescaper;
  29. import org.apache.commons.lang3.text.translate.UnicodeUnpairedSurrogateRemover;

  30. /**
  31.  * Escapes and unescapes {@link String}s for
  32.  * Java, Java Script, HTML and XML.
  33.  *
  34.  * <p>#ThreadSafe#</p>
  35.  * @since 2.0
  36.  * @deprecated As of 3.6, use Apache Commons Text
  37.  * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/StringEscapeUtils.html">
  38.  * StringEscapeUtils</a> instead
  39.  */
  40. @Deprecated
  41. public class StringEscapeUtils {

  42.     /* ESCAPE TRANSLATORS */

  43.     private static final class CsvEscaper extends CharSequenceTranslator {

  44.         private static final char CSV_DELIMITER = ',';
  45.         private static final char CSV_QUOTE = '"';
  46.         private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
  47.         private static final char[] CSV_SEARCH_CHARS = { CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF };

  48.         @Override
  49.         public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
  50.             if (index != 0) {
  51.                 throw new IllegalStateException("CsvEscaper should never reach the [1] index");
  52.             }
  53.             if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) {
  54.                 out.write(input.toString());
  55.             } else {
  56.                 out.write(CSV_QUOTE);
  57.                 out.write(Strings.CS.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR));
  58.                 out.write(CSV_QUOTE);
  59.             }
  60.             return Character.codePointCount(input, 0, input.length());
  61.         }
  62.     }

  63.     private static final class CsvUnescaper extends CharSequenceTranslator {

  64.         private static final char CSV_DELIMITER = ',';
  65.         private static final char CSV_QUOTE = '"';
  66.         private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
  67.         private static final char[] CSV_SEARCH_CHARS = {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};

  68.         @Override
  69.         public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
  70.             if (index != 0) {
  71.                 throw new IllegalStateException("CsvUnescaper should never reach the [1] index");
  72.             }
  73.             if (input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE) {
  74.                 out.write(input.toString());
  75.                 return Character.codePointCount(input, 0, input.length());
  76.             }
  77.             // strip quotes
  78.             final String quoteless = input.subSequence(1, input.length() - 1).toString();
  79.             if (StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS)) {
  80.                 // deal with escaped quotes; ie) ""
  81.                 out.write(Strings.CS.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR));
  82.             } else {
  83.                 out.write(input.toString());
  84.             }
  85.             return Character.codePointCount(input, 0, input.length());
  86.         }
  87.     }

  88.     /**
  89.      * Translator object for escaping Java.
  90.      *
  91.      * While {@link #escapeJava(String)} is the expected method of use, this
  92.      * object allows the Java escaping functionality to be used
  93.      * as the foundation for a custom translator.
  94.      *
  95.      * @since 3.0
  96.      */
  97.     public static final CharSequenceTranslator ESCAPE_JAVA =
  98.           new LookupTranslator(
  99.             new String[][] {
  100.               {"\"", "\\\""},
  101.               {"\\", "\\\\"},
  102.           }).with(
  103.             new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE())
  104.           ).with(
  105.             JavaUnicodeEscaper.outsideOf(32, 0x7f)
  106.         );

  107.     /**
  108.      * Translator object for escaping EcmaScript/JavaScript.
  109.      *
  110.      * While {@link #escapeEcmaScript(String)} is the expected method of use, this
  111.      * object allows the EcmaScript escaping functionality to be used
  112.      * as the foundation for a custom translator.
  113.      *
  114.      * @since 3.0
  115.      */
  116.     public static final CharSequenceTranslator ESCAPE_ECMASCRIPT =
  117.         new AggregateTranslator(
  118.             new LookupTranslator(
  119.                       new String[][] {
  120.                             {"'", "\\'"},
  121.                             {"\"", "\\\""},
  122.                             {"\\", "\\\\"},
  123.                             {"/", "\\/"}
  124.                       }),
  125.             new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
  126.             JavaUnicodeEscaper.outsideOf(32, 0x7f)
  127.         );

  128.     /**
  129.      * Translator object for escaping Json.
  130.      *
  131.      * While {@link #escapeJson(String)} is the expected method of use, this
  132.      * object allows the Json escaping functionality to be used
  133.      * as the foundation for a custom translator.
  134.      *
  135.      * @since 3.2
  136.      */
  137.     public static final CharSequenceTranslator ESCAPE_JSON =
  138.         new AggregateTranslator(
  139.             new LookupTranslator(
  140.                       new String[][] {
  141.                             {"\"", "\\\""},
  142.                             {"\\", "\\\\"},
  143.                             {"/", "\\/"}
  144.                       }),
  145.             new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
  146.             JavaUnicodeEscaper.outsideOf(32, 0x7f)
  147.         );

  148.     /**
  149.      * Translator object for escaping XML.
  150.      *
  151.      * While {@link #escapeXml(String)} is the expected method of use, this
  152.      * object allows the XML escaping functionality to be used
  153.      * as the foundation for a custom translator.
  154.      *
  155.      * @since 3.0
  156.      * @deprecated use {@link #ESCAPE_XML10} or {@link #ESCAPE_XML11} instead.
  157.      */
  158.     @Deprecated
  159.     public static final CharSequenceTranslator ESCAPE_XML =
  160.         new AggregateTranslator(
  161.             new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
  162.             new LookupTranslator(EntityArrays.APOS_ESCAPE())
  163.         );

  164.     /**
  165.      * Translator object for escaping XML 1.0.
  166.      *
  167.      * While {@link #escapeXml10(String)} is the expected method of use, this
  168.      * object allows the XML escaping functionality to be used
  169.      * as the foundation for a custom translator.
  170.      *
  171.      * @since 3.3
  172.      */
  173.     public static final CharSequenceTranslator ESCAPE_XML10 =
  174.         new AggregateTranslator(
  175.             new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
  176.             new LookupTranslator(EntityArrays.APOS_ESCAPE()),
  177.             new LookupTranslator(
  178.                     new String[][] {
  179.                             { "\u0000", StringUtils.EMPTY },
  180.                             { "\u0001", StringUtils.EMPTY },
  181.                             { "\u0002", StringUtils.EMPTY },
  182.                             { "\u0003", StringUtils.EMPTY },
  183.                             { "\u0004", StringUtils.EMPTY },
  184.                             { "\u0005", StringUtils.EMPTY },
  185.                             { "\u0006", StringUtils.EMPTY },
  186.                             { "\u0007", StringUtils.EMPTY },
  187.                             { "\u0008", StringUtils.EMPTY },
  188.                             { "\u000b", StringUtils.EMPTY },
  189.                             { "\u000c", StringUtils.EMPTY },
  190.                             { "\u000e", StringUtils.EMPTY },
  191.                             { "\u000f", StringUtils.EMPTY },
  192.                             { "\u0010", StringUtils.EMPTY },
  193.                             { "\u0011", StringUtils.EMPTY },
  194.                             { "\u0012", StringUtils.EMPTY },
  195.                             { "\u0013", StringUtils.EMPTY },
  196.                             { "\u0014", StringUtils.EMPTY },
  197.                             { "\u0015", StringUtils.EMPTY },
  198.                             { "\u0016", StringUtils.EMPTY },
  199.                             { "\u0017", StringUtils.EMPTY },
  200.                             { "\u0018", StringUtils.EMPTY },
  201.                             { "\u0019", StringUtils.EMPTY },
  202.                             { "\u001a", StringUtils.EMPTY },
  203.                             { "\u001b", StringUtils.EMPTY },
  204.                             { "\u001c", StringUtils.EMPTY },
  205.                             { "\u001d", StringUtils.EMPTY },
  206.                             { "\u001e", StringUtils.EMPTY },
  207.                             { "\u001f", StringUtils.EMPTY },
  208.                             { "\ufffe", StringUtils.EMPTY },
  209.                             { "\uffff", StringUtils.EMPTY }
  210.                     }),
  211.             NumericEntityEscaper.between(0x7f, 0x84),
  212.             NumericEntityEscaper.between(0x86, 0x9f),
  213.             new UnicodeUnpairedSurrogateRemover()
  214.         );

  215.     /**
  216.      * Translator object for escaping XML 1.1.
  217.      *
  218.      * While {@link #escapeXml11(String)} is the expected method of use, this
  219.      * object allows the XML escaping functionality to be used
  220.      * as the foundation for a custom translator.
  221.      *
  222.      * @since 3.3
  223.      */
  224.     public static final CharSequenceTranslator ESCAPE_XML11 =
  225.         new AggregateTranslator(
  226.             new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
  227.             new LookupTranslator(EntityArrays.APOS_ESCAPE()),
  228.             new LookupTranslator(
  229.                     new String[][] {
  230.                             { "\u0000", StringUtils.EMPTY },
  231.                             { "\u000b", "&#11;" },
  232.                             { "\u000c", "&#12;" },
  233.                             { "\ufffe", StringUtils.EMPTY },
  234.                             { "\uffff", StringUtils.EMPTY }
  235.                     }),
  236.             NumericEntityEscaper.between(0x1, 0x8),
  237.             NumericEntityEscaper.between(0xe, 0x1f),
  238.             NumericEntityEscaper.between(0x7f, 0x84),
  239.             NumericEntityEscaper.between(0x86, 0x9f),
  240.             new UnicodeUnpairedSurrogateRemover()
  241.         );

  242.     /**
  243.      * Translator object for escaping HTML version 3.0.
  244.      *
  245.      * While {@link #escapeHtml3(String)} is the expected method of use, this
  246.      * object allows the HTML escaping functionality to be used
  247.      * as the foundation for a custom translator.
  248.      *
  249.      * @since 3.0
  250.      */
  251.     public static final CharSequenceTranslator ESCAPE_HTML3 =
  252.         new AggregateTranslator(
  253.             new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
  254.             new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE())
  255.         );

  256.     /**
  257.      * Translator object for escaping HTML version 4.0.
  258.      *
  259.      * While {@link #escapeHtml4(String)} is the expected method of use, this
  260.      * object allows the HTML escaping functionality to be used
  261.      * as the foundation for a custom translator.
  262.      *
  263.      * @since 3.0
  264.      */
  265.     public static final CharSequenceTranslator ESCAPE_HTML4 =
  266.         new AggregateTranslator(
  267.             new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
  268.             new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()),
  269.             new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE())
  270.         );

  271.     /* UNESCAPE TRANSLATORS */

  272.     /**
  273.      * Translator object for escaping individual Comma Separated Values.
  274.      *
  275.      * While {@link #escapeCsv(String)} is the expected method of use, this
  276.      * object allows the CSV escaping functionality to be used
  277.      * as the foundation for a custom translator.
  278.      *
  279.      * @since 3.0
  280.      */
  281.     public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper();

  282.     /**
  283.      * Translator object for unescaping escaped Java.
  284.      *
  285.      * While {@link #unescapeJava(String)} is the expected method of use, this
  286.      * object allows the Java unescaping functionality to be used
  287.      * as the foundation for a custom translator.
  288.      *
  289.      * @since 3.0
  290.      */
  291.     // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)?
  292.     public static final CharSequenceTranslator UNESCAPE_JAVA =
  293.         new AggregateTranslator(
  294.             new OctalUnescaper(),     // .between('\1', '\377'),
  295.             new UnicodeUnescaper(),
  296.             new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()),
  297.             new LookupTranslator(
  298.                       new String[][] {
  299.                             {"\\\\", "\\"},
  300.                             {"\\\"", "\""},
  301.                             {"\\'", "'"},
  302.                             {"\\", ""}
  303.                       })
  304.         );

  305.     /**
  306.      * Translator object for unescaping escaped EcmaScript.
  307.      *
  308.      * While {@link #unescapeEcmaScript(String)} is the expected method of use, this
  309.      * object allows the EcmaScript unescaping functionality to be used
  310.      * as the foundation for a custom translator.
  311.      *
  312.      * @since 3.0
  313.      */
  314.     public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA;

  315.     /**
  316.      * Translator object for unescaping escaped Json.
  317.      *
  318.      * While {@link #unescapeJson(String)} is the expected method of use, this
  319.      * object allows the Json unescaping functionality to be used
  320.      * as the foundation for a custom translator.
  321.      *
  322.      * @since 3.2
  323.      */
  324.     public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA;

  325.     /**
  326.      * Translator object for unescaping escaped HTML 3.0.
  327.      *
  328.      * While {@link #unescapeHtml3(String)} is the expected method of use, this
  329.      * object allows the HTML unescaping functionality to be used
  330.      * as the foundation for a custom translator.
  331.      *
  332.      * @since 3.0
  333.      */
  334.     public static final CharSequenceTranslator UNESCAPE_HTML3 =
  335.         new AggregateTranslator(
  336.             new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
  337.             new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
  338.             new NumericEntityUnescaper()
  339.         );

  340.     /**
  341.      * Translator object for unescaping escaped HTML 4.0.
  342.      *
  343.      * While {@link #unescapeHtml4(String)} is the expected method of use, this
  344.      * object allows the HTML unescaping functionality to be used
  345.      * as the foundation for a custom translator.
  346.      *
  347.      * @since 3.0
  348.      */
  349.     public static final CharSequenceTranslator UNESCAPE_HTML4 =
  350.         new AggregateTranslator(
  351.             new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
  352.             new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
  353.             new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()),
  354.             new NumericEntityUnescaper()
  355.         );

  356.     /**
  357.      * Translator object for unescaping escaped XML.
  358.      *
  359.      * While {@link #unescapeXml(String)} is the expected method of use, this
  360.      * object allows the XML unescaping functionality to be used
  361.      * as the foundation for a custom translator.
  362.      *
  363.      * @since 3.0
  364.      */
  365.     public static final CharSequenceTranslator UNESCAPE_XML =
  366.         new AggregateTranslator(
  367.             new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
  368.             new LookupTranslator(EntityArrays.APOS_UNESCAPE()),
  369.             new NumericEntityUnescaper()
  370.         );

  371.     /**
  372.      * Translator object for unescaping escaped Comma Separated Value entries.
  373.      *
  374.      * While {@link #unescapeCsv(String)} is the expected method of use, this
  375.      * object allows the CSV unescaping functionality to be used
  376.      * as the foundation for a custom translator.
  377.      *
  378.      * @since 3.0
  379.      */
  380.     public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper();

  381.     /* Helper functions */

  382.     /**
  383.      * Returns a {@link String} value for a CSV column enclosed in double quotes,
  384.      * if required.
  385.      *
  386.      * <p>If the value contains a comma, newline or double quote, then the
  387.      *    String value is returned enclosed in double quotes.</p>
  388.      *
  389.      * <p>Any double quote characters in the value are escaped with another double quote.</p>
  390.      *
  391.      * <p>If the value does not contain a comma, newline or double quote, then the
  392.      *    String value is returned unchanged.</p>
  393.      *
  394.      * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
  395.      * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>.
  396.      *
  397.      * @param input the input CSV column String, may be null
  398.      * @return the input String, enclosed in double quotes if the value contains a comma,
  399.      * newline or double quote, {@code null} if null string input
  400.      * @since 2.4
  401.      */
  402.     public static final String escapeCsv(final String input) {
  403.         return ESCAPE_CSV.translate(input);
  404.     }

  405.     /**
  406.      * Escapes the characters in a {@link String} using EcmaScript String rules.
  407.      * <p>Escapes any values it finds into their EcmaScript String form.
  408.      * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
  409.      *
  410.      * <p>So a tab becomes the characters {@code '\\'} and
  411.      * {@code 't'}.</p>
  412.      *
  413.      * <p>The only difference between Java strings and EcmaScript strings
  414.      * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p>
  415.      *
  416.      * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects.</p>
  417.      *
  418.      * <p>Example:</p>
  419.      * <pre>
  420.      * input string: He didn't say, "Stop!"
  421.      * output string: He didn\'t say, \"Stop!\"
  422.      * </pre>
  423.      *
  424.      * @param input  String to escape values in, may be null
  425.      * @return String with escaped values, {@code null} if null string input
  426.      * @since 3.0
  427.      */
  428.     public static final String escapeEcmaScript(final String input) {
  429.         return ESCAPE_ECMASCRIPT.translate(input);
  430.     }

  431.     /**
  432.      * Escapes the characters in a {@link String} using HTML entities.
  433.      * <p>Supports only the HTML 3.0 entities.</p>
  434.      *
  435.      * @param input  the {@link String} to escape, may be null
  436.      * @return a new escaped {@link String}, {@code null} if null string input
  437.      * @since 3.0
  438.      */
  439.     public static final String escapeHtml3(final String input) {
  440.         return ESCAPE_HTML3.translate(input);
  441.     }

  442.     /**
  443.      * Escapes the characters in a {@link String} using HTML entities.
  444.      *
  445.      * <p>
  446.      * For example:
  447.      * </p>
  448.      * <p>{@code "bread" &amp; "butter"}</p>
  449.      * becomes:
  450.      * <p>
  451.      * {@code &amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;}.
  452.      * </p>
  453.      *
  454.      * <p>Supports all known HTML 4.0 entities, including funky accents.
  455.      * Note that the commonly used apostrophe escape character (&amp;apos;)
  456.      * is not a legal entity and so is not supported).</p>
  457.      *
  458.      * @param input  the {@link String} to escape, may be null
  459.      * @return a new escaped {@link String}, {@code null} if null string input
  460.      * @see <a href="https://web.archive.org/web/20060225074150/https://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
  461.      * @see <a href="https://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
  462.      * @see <a href="https://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
  463.      * @see <a href="https://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
  464.      * @see <a href="https://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
  465.      * @since 3.0
  466.      */
  467.     public static final String escapeHtml4(final String input) {
  468.         return ESCAPE_HTML4.translate(input);
  469.     }

  470.     /**
  471.      * Escapes the characters in a {@link String} using Java String rules.
  472.      *
  473.      * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
  474.      *
  475.      * <p>So a tab becomes the characters {@code '\\'} and
  476.      * {@code 't'}.</p>
  477.      *
  478.      * <p>The only difference between Java strings and JavaScript strings
  479.      * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p>
  480.      *
  481.      * <p>Example:</p>
  482.      * <pre>
  483.      * input string: He didn't say, "Stop!"
  484.      * output string: He didn't say, \"Stop!\"
  485.      * </pre>
  486.      *
  487.      * @param input  String to escape values in, may be null
  488.      * @return String with escaped values, {@code null} if null string input
  489.      */
  490.     public static final String escapeJava(final String input) {
  491.         return ESCAPE_JAVA.translate(input);
  492.     }

  493.     /**
  494.      * Escapes the characters in a {@link String} using Json String rules.
  495.      * <p>Escapes any values it finds into their Json String form.
  496.      * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
  497.      *
  498.      * <p>So a tab becomes the characters {@code '\\'} and
  499.      * {@code 't'}.</p>
  500.      *
  501.      * <p>The only difference between Java strings and Json strings
  502.      * is that in Json, forward-slash (/) is escaped.</p>
  503.      *
  504.      * <p>See https://www.ietf.org/rfc/rfc4627.txt for further details.</p>
  505.      *
  506.      * <p>Example:</p>
  507.      * <pre>
  508.      * input string: He didn't say, "Stop!"
  509.      * output string: He didn't say, \"Stop!\"
  510.      * </pre>
  511.      *
  512.      * @param input  String to escape values in, may be null
  513.      * @return String with escaped values, {@code null} if null string input
  514.      * @since 3.2
  515.      */
  516.     public static final String escapeJson(final String input) {
  517.         return ESCAPE_JSON.translate(input);
  518.     }

  519.     /**
  520.      * Escapes the characters in a {@link String} using XML entities.
  521.      *
  522.      * <p>For example: {@code "bread" & "butter"} =&gt;
  523.      * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
  524.      * </p>
  525.      *
  526.      * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
  527.      * Does not support DTDs or external entities.</p>
  528.      *
  529.      * <p>Note that Unicode characters greater than 0x7f are as of 3.0, no longer
  530.      *    escaped. If you still wish this functionality, you can achieve it
  531.      *    via the following:
  532.      * {@code StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE));}</p>
  533.      *
  534.      * @param input  the {@link String} to escape, may be null
  535.      * @return a new escaped {@link String}, {@code null} if null string input
  536.      * @see #unescapeXml(String)
  537.      * @deprecated use {@link #escapeXml10(java.lang.String)} or {@link #escapeXml11(java.lang.String)} instead.
  538.      */
  539.     @Deprecated
  540.     public static final String escapeXml(final String input) {
  541.         return ESCAPE_XML.translate(input);
  542.     }

  543.     /**
  544.      * Escapes the characters in a {@link String} using XML entities.
  545.      * <p>
  546.      * For example:
  547.      * </p>
  548.      *
  549.      * <pre>{@code
  550.      * "bread" & "butter"
  551.      * }</pre>
  552.      * <p>
  553.      * converts to:
  554.      * </p>
  555.      *
  556.      * <pre>
  557.      * {@code
  558.      * &quot;bread&quot; &amp; &quot;butter&quot;
  559.      * }
  560.      * </pre>
  561.      *
  562.      * <p>
  563.      * Note that XML 1.0 is a text-only format: it cannot represent control characters or unpaired Unicode surrogate code points, even after escaping. The
  564.      * method {@code escapeXml10} will remove characters that do not fit in the following ranges:
  565.      * </p>
  566.      *
  567.      * <p>
  568.      * {@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}
  569.      * </p>
  570.      *
  571.      * <p>
  572.      * Though not strictly necessary, {@code escapeXml10} will escape characters in the following ranges:
  573.      * </p>
  574.      *
  575.      * <p>
  576.      * {@code [#x7F-#x84] | [#x86-#x9F]}
  577.      * </p>
  578.      *
  579.      * <p>
  580.      * The returned string can be inserted into a valid XML 1.0 or XML 1.1 document. If you want to allow more non-text characters in an XML 1.1 document, use
  581.      * {@link #escapeXml11(String)}.
  582.      * </p>
  583.      *
  584.      * @param input the {@link String} to escape, may be null
  585.      * @return a new escaped {@link String}, {@code null} if null string input
  586.      * @see #unescapeXml(String)
  587.      * @since 3.3
  588.      */
  589.     public static String escapeXml10(final String input) {
  590.         return ESCAPE_XML10.translate(input);
  591.     }

  592.     /**
  593.      * Escapes the characters in a {@link String} using XML entities.
  594.      *
  595.      * <p>For example: {@code "bread" & "butter"} =&gt;
  596.      * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
  597.      * </p>
  598.      *
  599.      * <p>XML 1.1 can represent certain control characters, but it cannot represent
  600.      * the null byte or unpaired Unicode surrogate code points, even after escaping.
  601.      * {@code escapeXml11} will remove characters that do not fit in the following
  602.      * ranges:</p>
  603.      *
  604.      * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
  605.      *
  606.      * <p>{@code escapeXml11} will escape characters in the following ranges:</p>
  607.      *
  608.      * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p>
  609.      *
  610.      * <p>The returned string can be inserted into a valid XML 1.1 document. Do not
  611.      * use it for XML 1.0 documents.</p>
  612.      *
  613.      * @param input  the {@link String} to escape, may be null
  614.      * @return a new escaped {@link String}, {@code null} if null string input
  615.      * @see #unescapeXml(String)
  616.      * @since 3.3
  617.      */
  618.     public static String escapeXml11(final String input) {
  619.         return ESCAPE_XML11.translate(input);
  620.     }

  621.     /**
  622.      * Returns a {@link String} value for an unescaped CSV column.
  623.      *
  624.      * <p>If the value is enclosed in double quotes, and contains a comma, newline
  625.      *    or double quote, then quotes are removed.
  626.      * </p>
  627.      *
  628.      * <p>Any double quote escaped characters (a pair of double quotes) are unescaped
  629.      *    to just one double quote.</p>
  630.      *
  631.      * <p>If the value is not enclosed in double quotes, or is and does not contain a
  632.      *    comma, newline or double quote, then the String value is returned unchanged.</p>
  633.      *
  634.      * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
  635.      * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>.
  636.      *
  637.      * @param input the input CSV column String, may be null
  638.      * @return the input String, with enclosing double quotes removed and embedded double
  639.      * quotes unescaped, {@code null} if null string input
  640.      * @since 2.4
  641.      */
  642.     public static final String unescapeCsv(final String input) {
  643.         return UNESCAPE_CSV.translate(input);
  644.     }

  645.     /**
  646.      * Unescapes any EcmaScript literals found in the {@link String}.
  647.      *
  648.      * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
  649.      * into a newline character, unless the {@code '\'} is preceded by another
  650.      * {@code '\'}.</p>
  651.      *
  652.      * @see #unescapeJava(String)
  653.      * @param input  the {@link String} to unescape, may be null
  654.      * @return A new unescaped {@link String}, {@code null} if null string input
  655.      * @since 3.0
  656.      */
  657.     public static final String unescapeEcmaScript(final String input) {
  658.         return UNESCAPE_ECMASCRIPT.translate(input);
  659.     }

  660.     /**
  661.      * Unescapes a string containing entity escapes to a string
  662.      * containing the actual Unicode characters corresponding to the
  663.      * escapes. Supports only HTML 3.0 entities.
  664.      *
  665.      * @param input  the {@link String} to unescape, may be null
  666.      * @return a new unescaped {@link String}, {@code null} if null string input
  667.      * @since 3.0
  668.      */
  669.     public static final String unescapeHtml3(final String input) {
  670.         return UNESCAPE_HTML3.translate(input);
  671.     }

  672.     /**
  673.      * Unescapes a string containing entity escapes to a string
  674.      * containing the actual Unicode characters corresponding to the
  675.      * escapes. Supports HTML 4.0 entities.
  676.      *
  677.      * <p>For example, the string {@code "&lt;Fran&ccedil;ais&gt;"}
  678.      * will become {@code "<Français>"}</p>
  679.      *
  680.      * <p>If an entity is unrecognized, it is left alone, and inserted
  681.      * verbatim into the result string. e.g. {@code "&gt;&zzzz;x"} will
  682.      * become {@code ">&zzzz;x"}.</p>
  683.      *
  684.      * @param input  the {@link String} to unescape, may be null
  685.      * @return a new unescaped {@link String}, {@code null} if null string input
  686.      * @since 3.0
  687.      */
  688.     public static final String unescapeHtml4(final String input) {
  689.         return UNESCAPE_HTML4.translate(input);
  690.     }

  691.     /**
  692.      * Unescapes any Java literals found in the {@link String}.
  693.      * For example, it will turn a sequence of {@code '\'} and
  694.      * {@code 'n'} into a newline character, unless the {@code '\'}
  695.      * is preceded by another {@code '\'}.
  696.      *
  697.      * @param input  the {@link String} to unescape, may be null
  698.      * @return a new unescaped {@link String}, {@code null} if null string input
  699.      */
  700.     public static final String unescapeJava(final String input) {
  701.         return UNESCAPE_JAVA.translate(input);
  702.     }

  703.     /**
  704.      * Unescapes any Json literals found in the {@link String}.
  705.      *
  706.      * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
  707.      * into a newline character, unless the {@code '\'} is preceded by another
  708.      * {@code '\'}.</p>
  709.      *
  710.      * @see #unescapeJava(String)
  711.      * @param input  the {@link String} to unescape, may be null
  712.      * @return A new unescaped {@link String}, {@code null} if null string input
  713.      * @since 3.2
  714.      */
  715.     public static final String unescapeJson(final String input) {
  716.         return UNESCAPE_JSON.translate(input);
  717.     }

  718.     /**
  719.      * Unescapes a string containing XML entity escapes to a string
  720.      * containing the actual Unicode characters corresponding to the
  721.      * escapes.
  722.      *
  723.      * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
  724.      * Does not support DTDs or external entities.</p>
  725.      *
  726.      * <p>Note that numerical \\u Unicode codes are unescaped to their respective
  727.      *    Unicode characters. This may change in future releases.</p>
  728.      *
  729.      * @param input  the {@link String} to unescape, may be null
  730.      * @return a new unescaped {@link String}, {@code null} if null string input
  731.      * @see #escapeXml(String)
  732.      * @see #escapeXml10(String)
  733.      * @see #escapeXml11(String)
  734.      */
  735.     public static final String unescapeXml(final String input) {
  736.         return UNESCAPE_XML.translate(input);
  737.     }

  738.     /**
  739.      * {@link StringEscapeUtils} instances should NOT be constructed in
  740.      * standard programming.
  741.      *
  742.      * <p>Instead, the class should be used as:</p>
  743.      * <pre>StringEscapeUtils.escapeJava("foo");</pre>
  744.      *
  745.      * <p>This constructor is public to permit tools that require a JavaBean
  746.      * instance to operate.</p>
  747.      *
  748.      * @deprecated TODO Make private in 4.0.
  749.      */
  750.     @Deprecated
  751.     public StringEscapeUtils() {
  752.         // empty
  753.     }

  754. }