StringEscapeUtils.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.text;

  18. import java.io.IOException;
  19. import java.io.Writer;
  20. import java.util.Collections;
  21. import java.util.HashMap;
  22. import java.util.Map;

  23. import org.apache.commons.lang3.StringUtils;
  24. import org.apache.commons.text.translate.AggregateTranslator;
  25. import org.apache.commons.text.translate.CharSequenceTranslator;
  26. import org.apache.commons.text.translate.CsvTranslators;
  27. import org.apache.commons.text.translate.EntityArrays;
  28. import org.apache.commons.text.translate.JavaUnicodeEscaper;
  29. import org.apache.commons.text.translate.LookupTranslator;
  30. import org.apache.commons.text.translate.NumericEntityEscaper;
  31. import org.apache.commons.text.translate.NumericEntityUnescaper;
  32. import org.apache.commons.text.translate.OctalUnescaper;
  33. import org.apache.commons.text.translate.UnicodeUnescaper;
  34. import org.apache.commons.text.translate.UnicodeUnpairedSurrogateRemover;

  35. /**
  36.  * <p>
  37.  * Escapes and unescapes {@code String}s for Java, Java Script, HTML and XML.
  38.  * </p>
  39.  *
  40.  * <p>
  41.  * #ThreadSafe#
  42.  * </p>
  43.  *
  44.  * <p>
  45.  * This code has been adapted from Apache Commons Lang 3.5.
  46.  * </p>
  47.  *
  48.  * @since 1.0
  49.  */
  50. public class StringEscapeUtils {

  51.     /* ESCAPE TRANSLATORS */

  52.     /**
  53.      * Convenience wrapper for {@link StringBuilder} providing escape methods.
  54.      *
  55.      * <p>Example:</p>
  56.      * <pre>
  57.      * new Builder(ESCAPE_HTML4)
  58.      *      .append("&lt;p&gt;")
  59.      *      .escape("This is paragraph 1 and special chars like &amp; get escaped.")
  60.      *      .append("&lt;/p&gt;&lt;p&gt;")
  61.      *      .escape("This is paragraph 2 &amp; more...")
  62.      *      .append("&lt;/p&gt;")
  63.      *      .toString()
  64.      * </pre>
  65.      */
  66.     public static final class Builder {

  67.         /**
  68.          * StringBuilder to be used in the Builder class.
  69.          */
  70.         private final StringBuilder sb;

  71.         /**
  72.          * CharSequenceTranslator to be used in the Builder class.
  73.          */
  74.         private final CharSequenceTranslator translator;

  75.         /**
  76.          * Builder constructor.
  77.          *
  78.          * @param translator a CharSequenceTranslator.
  79.          */
  80.         private Builder(final CharSequenceTranslator translator) {
  81.             this.sb = new StringBuilder();
  82.             this.translator = translator;
  83.         }

  84.         /**
  85.          * Literal append, no escaping being done.
  86.          *
  87.          * @param input the String to append
  88.          * @return {@code this}, to enable chaining
  89.          */
  90.         public Builder append(final String input) {
  91.             sb.append(input);
  92.             return this;
  93.         }

  94.         /**
  95.          * Escape {@code input} according to the given {@link CharSequenceTranslator}.
  96.          *
  97.          * @param input the String to escape
  98.          * @return {@code this}, to enable chaining
  99.          */
  100.         public Builder escape(final String input) {
  101.             sb.append(translator.translate(input));
  102.             return this;
  103.         }

  104.         /**
  105.          * Return the escaped string.
  106.          *
  107.          * @return The escaped string
  108.          */
  109.         @Override
  110.         public String toString() {
  111.             return sb.toString();
  112.         }
  113.     }
  114.     /**
  115.      * Translator object for unescaping backslash escaped entries.
  116.      */
  117.     static class XsiUnescaper extends CharSequenceTranslator {

  118.         /**
  119.          * Escaped backslash constant.
  120.          */
  121.         private static final char BACKSLASH = '\\';

  122.         @Override
  123.         public int translate(final CharSequence input, final int index, final Writer writer) throws IOException {

  124.             if (index != 0) {
  125.                 throw new IllegalStateException("XsiUnescaper should never reach the [1] index");
  126.             }

  127.             final String s = input.toString();

  128.             int segmentStart = 0;
  129.             int searchOffset = 0;
  130.             while (true) {
  131.                 final int pos = s.indexOf(BACKSLASH, searchOffset);
  132.                 if (pos == -1) {
  133.                     if (segmentStart < s.length()) {
  134.                         writer.write(s.substring(segmentStart));
  135.                     }
  136.                     break;
  137.                 }
  138.                 if (pos > segmentStart) {
  139.                     writer.write(s.substring(segmentStart, pos));
  140.                 }
  141.                 segmentStart = pos + 1;
  142.                 searchOffset = pos + 2;
  143.             }

  144.             return Character.codePointCount(input, 0, input.length());
  145.         }
  146.     }

  147.     /**
  148.      * Translator object for escaping Java.
  149.      *
  150.      * While {@link #escapeJava(String)} is the expected method of use, this
  151.      * object allows the Java escaping functionality to be used
  152.      * as the foundation for a custom translator.
  153.      */
  154.     public static final CharSequenceTranslator ESCAPE_JAVA;
  155.     static {
  156.         final Map<CharSequence, CharSequence> escapeJavaMap = new HashMap<>();
  157.         escapeJavaMap.put("\"", "\\\"");
  158.         escapeJavaMap.put("\\", "\\\\");
  159.         ESCAPE_JAVA = new AggregateTranslator(
  160.                 new LookupTranslator(Collections.unmodifiableMap(escapeJavaMap)),
  161.                 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE),
  162.                 JavaUnicodeEscaper.outsideOf(32, 0x7f)
  163.         );
  164.     }

  165.     /**
  166.      * Translator object for escaping EcmaScript/JavaScript.
  167.      *
  168.      * While {@link #escapeEcmaScript(String)} is the expected method of use, this
  169.      * object allows the EcmaScript escaping functionality to be used
  170.      * as the foundation for a custom translator.
  171.      */
  172.     public static final CharSequenceTranslator ESCAPE_ECMASCRIPT;
  173.     static {
  174.         final Map<CharSequence, CharSequence> escapeEcmaScriptMap = new HashMap<>();
  175.         escapeEcmaScriptMap.put("'", "\\'");
  176.         escapeEcmaScriptMap.put("\"", "\\\"");
  177.         escapeEcmaScriptMap.put("\\", "\\\\");
  178.         escapeEcmaScriptMap.put("/", "\\/");
  179.         ESCAPE_ECMASCRIPT = new AggregateTranslator(
  180.                 new LookupTranslator(Collections.unmodifiableMap(escapeEcmaScriptMap)),
  181.                 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE),
  182.                 JavaUnicodeEscaper.outsideOf(32, 0x7f)
  183.         );
  184.     }

  185.     /**
  186.      * Translator object for escaping Json.
  187.      *
  188.      * While {@link #escapeJson(String)} is the expected method of use, this
  189.      * object allows the Json escaping functionality to be used
  190.      * as the foundation for a custom translator.
  191.      */
  192.     public static final CharSequenceTranslator ESCAPE_JSON;
  193.     static {
  194.         final Map<CharSequence, CharSequence> escapeJsonMap = new HashMap<>();
  195.         escapeJsonMap.put("\"", "\\\"");
  196.         escapeJsonMap.put("\\", "\\\\");
  197.         escapeJsonMap.put("/", "\\/");
  198.         ESCAPE_JSON = new AggregateTranslator(
  199.                 new LookupTranslator(Collections.unmodifiableMap(escapeJsonMap)),
  200.                 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE),
  201.                 JavaUnicodeEscaper.outsideOf(32, 0x7e)
  202.         );
  203.     }

  204.     /**
  205.      * Translator object for escaping XML 1.0.
  206.      *
  207.      * While {@link #escapeXml10(String)} is the expected method of use, this
  208.      * object allows the XML escaping functionality to be used
  209.      * as the foundation for a custom translator.
  210.      */
  211.     public static final CharSequenceTranslator ESCAPE_XML10;
  212.     static {
  213.         final Map<CharSequence, CharSequence> escapeXml10Map = new HashMap<>();
  214.         escapeXml10Map.put("\u0000", StringUtils.EMPTY);
  215.         escapeXml10Map.put("\u0001", StringUtils.EMPTY);
  216.         escapeXml10Map.put("\u0002", StringUtils.EMPTY);
  217.         escapeXml10Map.put("\u0003", StringUtils.EMPTY);
  218.         escapeXml10Map.put("\u0004", StringUtils.EMPTY);
  219.         escapeXml10Map.put("\u0005", StringUtils.EMPTY);
  220.         escapeXml10Map.put("\u0006", StringUtils.EMPTY);
  221.         escapeXml10Map.put("\u0007", StringUtils.EMPTY);
  222.         escapeXml10Map.put("\u0008", StringUtils.EMPTY);
  223.         escapeXml10Map.put("\u000b", StringUtils.EMPTY);
  224.         escapeXml10Map.put("\u000c", StringUtils.EMPTY);
  225.         escapeXml10Map.put("\u000e", StringUtils.EMPTY);
  226.         escapeXml10Map.put("\u000f", StringUtils.EMPTY);
  227.         escapeXml10Map.put("\u0010", StringUtils.EMPTY);
  228.         escapeXml10Map.put("\u0011", StringUtils.EMPTY);
  229.         escapeXml10Map.put("\u0012", StringUtils.EMPTY);
  230.         escapeXml10Map.put("\u0013", StringUtils.EMPTY);
  231.         escapeXml10Map.put("\u0014", StringUtils.EMPTY);
  232.         escapeXml10Map.put("\u0015", StringUtils.EMPTY);
  233.         escapeXml10Map.put("\u0016", StringUtils.EMPTY);
  234.         escapeXml10Map.put("\u0017", StringUtils.EMPTY);
  235.         escapeXml10Map.put("\u0018", StringUtils.EMPTY);
  236.         escapeXml10Map.put("\u0019", StringUtils.EMPTY);
  237.         escapeXml10Map.put("\u001a", StringUtils.EMPTY);
  238.         escapeXml10Map.put("\u001b", StringUtils.EMPTY);
  239.         escapeXml10Map.put("\u001c", StringUtils.EMPTY);
  240.         escapeXml10Map.put("\u001d", StringUtils.EMPTY);
  241.         escapeXml10Map.put("\u001e", StringUtils.EMPTY);
  242.         escapeXml10Map.put("\u001f", StringUtils.EMPTY);
  243.         escapeXml10Map.put("\ufffe", StringUtils.EMPTY);
  244.         escapeXml10Map.put("\uffff", StringUtils.EMPTY);
  245.         ESCAPE_XML10 = new AggregateTranslator(
  246.                 new LookupTranslator(EntityArrays.BASIC_ESCAPE),
  247.                 new LookupTranslator(EntityArrays.APOS_ESCAPE),
  248.                 new LookupTranslator(Collections.unmodifiableMap(escapeXml10Map)),
  249.                 NumericEntityEscaper.between(0x7f, 0x84),
  250.                 NumericEntityEscaper.between(0x86, 0x9f),
  251.                 new UnicodeUnpairedSurrogateRemover()
  252.         );
  253.     }

  254.     /**
  255.      * Translator object for escaping XML 1.1.
  256.      *
  257.      * While {@link #escapeXml11(String)} is the expected method of use, this
  258.      * object allows the XML escaping functionality to be used
  259.      * as the foundation for a custom translator.
  260.      */
  261.     public static final CharSequenceTranslator ESCAPE_XML11;

  262.     static {
  263.         final Map<CharSequence, CharSequence> escapeXml11Map = new HashMap<>();
  264.         escapeXml11Map.put("\u0000", StringUtils.EMPTY);
  265.         escapeXml11Map.put("\u000b", "&#11;");
  266.         escapeXml11Map.put("\u000c", "&#12;");
  267.         escapeXml11Map.put("\ufffe", StringUtils.EMPTY);
  268.         escapeXml11Map.put("\uffff", StringUtils.EMPTY);
  269.         ESCAPE_XML11 = new AggregateTranslator(
  270.                 new LookupTranslator(EntityArrays.BASIC_ESCAPE),
  271.                 new LookupTranslator(EntityArrays.APOS_ESCAPE),
  272.                 new LookupTranslator(Collections.unmodifiableMap(escapeXml11Map)),
  273.                 NumericEntityEscaper.between(0x1, 0x8),
  274.                 NumericEntityEscaper.between(0xe, 0x1f),
  275.                 NumericEntityEscaper.between(0x7f, 0x84),
  276.                 NumericEntityEscaper.between(0x86, 0x9f),
  277.                 new UnicodeUnpairedSurrogateRemover()
  278.         );
  279.     }

  280.     /**
  281.      * Translator object for escaping HTML version 3.0.
  282.      *
  283.      * While {@link #escapeHtml3(String)} is the expected method of use, this
  284.      * object allows the HTML escaping functionality to be used
  285.      * as the foundation for a custom translator.
  286.      */
  287.     public static final CharSequenceTranslator ESCAPE_HTML3 =
  288.             new AggregateTranslator(
  289.                     new LookupTranslator(EntityArrays.BASIC_ESCAPE),
  290.                     new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE)
  291.             );

  292.     /**
  293.      * Translator object for escaping HTML version 4.0.
  294.      *
  295.      * While {@link #escapeHtml4(String)} is the expected method of use, this
  296.      * object allows the HTML escaping functionality to be used
  297.      * as the foundation for a custom translator.
  298.      */
  299.     public static final CharSequenceTranslator ESCAPE_HTML4 =
  300.             new AggregateTranslator(
  301.                     new LookupTranslator(EntityArrays.BASIC_ESCAPE),
  302.                     new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE),
  303.                     new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE)
  304.             );
  305.     /**
  306.      * Translator object for escaping individual Comma Separated Values.
  307.      *
  308.      * While {@link #escapeCsv(String)} is the expected method of use, this
  309.      * object allows the CSV escaping functionality to be used
  310.      * as the foundation for a custom translator.
  311.      */
  312.     public static final CharSequenceTranslator ESCAPE_CSV = new CsvTranslators.CsvEscaper();

  313.     /* UNESCAPE TRANSLATORS */

  314.     /**
  315.      * Translator object for escaping Shell command language.
  316.      *
  317.      * @see <a href="https://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a>
  318.      */
  319.     public static final CharSequenceTranslator ESCAPE_XSI;
  320.     static {
  321.         final Map<CharSequence, CharSequence> escapeXsiMap = new HashMap<>();
  322.         escapeXsiMap.put("|", "\\|");
  323.         escapeXsiMap.put("&", "\\&");
  324.         escapeXsiMap.put(";", "\\;");
  325.         escapeXsiMap.put("<", "\\<");
  326.         escapeXsiMap.put(">", "\\>");
  327.         escapeXsiMap.put("(", "\\(");
  328.         escapeXsiMap.put(")", "\\)");
  329.         escapeXsiMap.put("$", "\\$");
  330.         escapeXsiMap.put("`", "\\`");
  331.         escapeXsiMap.put("\\", "\\\\");
  332.         escapeXsiMap.put("\"", "\\\"");
  333.         escapeXsiMap.put("'", "\\'");
  334.         escapeXsiMap.put(" ", "\\ ");
  335.         escapeXsiMap.put("\t", "\\\t");
  336.         escapeXsiMap.put("\r\n", StringUtils.EMPTY);
  337.         escapeXsiMap.put("\n", StringUtils.EMPTY);
  338.         escapeXsiMap.put("*", "\\*");
  339.         escapeXsiMap.put("?", "\\?");
  340.         escapeXsiMap.put("[", "\\[");
  341.         escapeXsiMap.put("#", "\\#");
  342.         escapeXsiMap.put("~", "\\~");
  343.         escapeXsiMap.put("=", "\\=");
  344.         escapeXsiMap.put("%", "\\%");
  345.         ESCAPE_XSI = new LookupTranslator(
  346.                 Collections.unmodifiableMap(escapeXsiMap)
  347.         );
  348.     }

  349.     /**
  350.      * Translator object for unescaping escaped Java.
  351.      *
  352.      * While {@link #unescapeJava(String)} is the expected method of use, this
  353.      * object allows the Java unescaping functionality to be used
  354.      * as the foundation for a custom translator.
  355.      */
  356.     public static final CharSequenceTranslator UNESCAPE_JAVA;

  357.     static {
  358.         final Map<CharSequence, CharSequence> unescapeJavaMap = new HashMap<>();
  359.         unescapeJavaMap.put("\\\\", "\\");
  360.         unescapeJavaMap.put("\\\"", "\"");
  361.         unescapeJavaMap.put("\\'", "'");
  362.         unescapeJavaMap.put("\\", StringUtils.EMPTY);
  363.         UNESCAPE_JAVA = new AggregateTranslator(
  364.                 new OctalUnescaper(),     // .between('\1', '\377'),
  365.                 new UnicodeUnescaper(),
  366.                 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE),
  367.                 new LookupTranslator(Collections.unmodifiableMap(unescapeJavaMap))
  368.         );
  369.     }

  370.     /**
  371.      * Translator object for unescaping escaped EcmaScript.
  372.      *
  373.      * While {@link #unescapeEcmaScript(String)} is the expected method of use, this
  374.      * object allows the EcmaScript unescaping functionality to be used
  375.      * as the foundation for a custom translator.
  376.      */
  377.     public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA;

  378.     /**
  379.      * Translator object for unescaping escaped Json.
  380.      *
  381.      * While {@link #unescapeJson(String)} is the expected method of use, this
  382.      * object allows the Json unescaping functionality to be used
  383.      * as the foundation for a custom translator.
  384.      */
  385.     public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA;

  386.     /**
  387.      * Translator object for unescaping escaped HTML 3.0.
  388.      *
  389.      * While {@link #unescapeHtml3(String)} is the expected method of use, this
  390.      * object allows the HTML unescaping functionality to be used
  391.      * as the foundation for a custom translator.
  392.      */
  393.     public static final CharSequenceTranslator UNESCAPE_HTML3 =
  394.             new AggregateTranslator(
  395.                     new LookupTranslator(EntityArrays.BASIC_UNESCAPE),
  396.                     new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE),
  397.                     new NumericEntityUnescaper()
  398.             );

  399.     /**
  400.      * Translator object for unescaping escaped HTML 4.0.
  401.      *
  402.      * While {@link #unescapeHtml4(String)} is the expected method of use, this
  403.      * object allows the HTML unescaping functionality to be used
  404.      * as the foundation for a custom translator.
  405.      */
  406.     public static final CharSequenceTranslator UNESCAPE_HTML4 =
  407.             new AggregateTranslator(
  408.                     new LookupTranslator(EntityArrays.BASIC_UNESCAPE),
  409.                     new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE),
  410.                     new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE),
  411.                     new NumericEntityUnescaper()
  412.             );

  413.     /**
  414.      * Translator object for unescaping escaped XML.
  415.      *
  416.      * While {@link #unescapeXml(String)} is the expected method of use, this
  417.      * object allows the XML unescaping functionality to be used
  418.      * as the foundation for a custom translator.
  419.      */
  420.     public static final CharSequenceTranslator UNESCAPE_XML =
  421.             new AggregateTranslator(
  422.                     new LookupTranslator(EntityArrays.BASIC_UNESCAPE),
  423.                     new LookupTranslator(EntityArrays.APOS_UNESCAPE),
  424.                     new NumericEntityUnescaper()
  425.             );

  426.     /**
  427.      * Translator object for unescaping escaped Comma Separated Value entries.
  428.      *
  429.      * While {@link #unescapeCsv(String)} is the expected method of use, this
  430.      * object allows the CSV unescaping functionality to be used
  431.      * as the foundation for a custom translator.
  432.      */
  433.     public static final CharSequenceTranslator UNESCAPE_CSV = new CsvTranslators.CsvUnescaper();

  434.     /* Helper functions */

  435.     /**
  436.      * Translator object for unescaping escaped XSI Value entries.
  437.      *
  438.      * While {@link #unescapeXSI(String)}  is the expected method of use, this
  439.      * object allows the XSI unescaping functionality to be used
  440.      * as the foundation for a custom translator.
  441.      */
  442.     public static final CharSequenceTranslator UNESCAPE_XSI = new XsiUnescaper();

  443.     /**
  444.      * Gets a {@link Builder}.
  445.      * @param translator the text translator
  446.      * @return {@link Builder}
  447.      */
  448.     public static StringEscapeUtils.Builder builder(final CharSequenceTranslator translator) {
  449.         return new Builder(translator);
  450.     }

  451.     /**
  452.      * Returns a {@code String} value for a CSV column enclosed in double quotes,
  453.      * if required.
  454.      *
  455.      * <p>If the value contains a comma, newline or double quote, then the
  456.      *    String value is returned enclosed in double quotes.</p>
  457.      *
  458.      * <p>Any double quote characters in the value are escaped with another double quote.</p>
  459.      *
  460.      * <p>If the value does not contain a comma, newline or double quote, then the
  461.      *    String value is returned unchanged.</p>
  462.      *
  463.      * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
  464.      * <a href="https://tools.ietf.org/html/rfc4180">RFC 4180</a>.
  465.      *
  466.      * @param input the input CSV column String, may be null
  467.      * @return The input String, enclosed in double quotes if the value contains a comma,
  468.      * newline or double quote, {@code null} if null string input
  469.      */
  470.     public static String escapeCsv(final String input) {
  471.         return ESCAPE_CSV.translate(input);
  472.     }

  473.     /**
  474.      * Escapes the characters in a {@code String} using EcmaScript String rules.
  475.      *
  476.      * <p>Escapes any values it finds into their EcmaScript String form.
  477.      * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
  478.      *
  479.      * <p>So a tab becomes the characters {@code '\\'} and
  480.      * {@code 't'}.</p>
  481.      *
  482.      * <p>The only difference between Java strings and EcmaScript strings
  483.      * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p>
  484.      *
  485.      * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects.</p>
  486.      *
  487.      * <p>Example:</p>
  488.      * <pre>
  489.      * input string: He didn't say, "Stop!"
  490.      * output string: He didn\'t say, \"Stop!\"
  491.      * </pre>
  492.      *
  493.      * <strong>Security Note.</strong> We only provide backslash escaping in this method. For example, {@code '\"'} has the output
  494.      * {@code '\\\"'} which could result in potential issues in the case where the string being escaped is being used
  495.      * in an HTML tag like {@code <select onmouseover="..." />}. If you wish to have more rigorous string escaping, you
  496.      * may consider the
  497.      * <a href="https://www.owasp.org/index.php/Category:OWASP_Enterprise_Security_API_JAVA">ESAPI Libraries</a>.
  498.      * Further, you can view the <a href="https://github.com/esapi">ESAPI GitHub Org</a>.
  499.      *
  500.      * @param input  String to escape values in, may be null
  501.      * @return String with escaped values, {@code null} if null string input
  502.      */
  503.     public static String escapeEcmaScript(final String input) {
  504.         return ESCAPE_ECMASCRIPT.translate(input);
  505.     }

  506.     /**
  507.      * Escapes the characters in a {@code String} using HTML entities.
  508.      *
  509.      * <p>Supports only the HTML 3.0 entities.</p>
  510.      *
  511.      * @param input  the {@code String} to escape, may be null
  512.      * @return a new escaped {@code String}, {@code null} if null string input
  513.      */
  514.     public static String escapeHtml3(final String input) {
  515.         return ESCAPE_HTML3.translate(input);
  516.     }

  517.     // HTML and XML
  518.     /**
  519.      * Escapes the characters in a {@code String} using HTML entities.
  520.      *
  521.      * <p>
  522.      * For example:
  523.      * </p>
  524.      * <p>{@code "bread" &amp; "butter"}</p>
  525.      * becomes:
  526.      * <p>
  527.      * {@code &quot;bread&quot; &amp;amp; &quot;butter&quot;}.
  528.      * </p>
  529.      *
  530.      * <p>Supports all known HTML 4.0 entities, including funky accents.
  531.      * Note that the commonly used apostrophe escape character (&amp;apos;)
  532.      * is not a legal entity and so is not supported).</p>
  533.      *
  534.      * @param input  the {@code String} to escape, may be null
  535.      * @return a new escaped {@code String}, {@code null} if null string input
  536.      * @see <a href="https://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
  537.      * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
  538.      * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
  539.      * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
  540.      * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
  541.      */
  542.     public static String escapeHtml4(final String input) {
  543.         return ESCAPE_HTML4.translate(input);
  544.     }

  545.     // Java and JavaScript
  546.     /**
  547.      * Escapes the characters in a {@code String} using Java String rules.
  548.      *
  549.      * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
  550.      *
  551.      * <p>So a tab becomes the characters {@code '\\'} and
  552.      * {@code 't'}.</p>
  553.      *
  554.      * <p>The only difference between Java strings and JavaScript strings
  555.      * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p>
  556.      *
  557.      * <p>Example:</p>
  558.      * <pre>
  559.      * input string: He didn't say, "Stop!"
  560.      * output string: He didn't say, \"Stop!\"
  561.      * </pre>
  562.      *
  563.      * @param input  String to escape values in, may be null
  564.      * @return String with escaped values, {@code null} if null string input
  565.      */
  566.     public static String escapeJava(final String input) {
  567.         return ESCAPE_JAVA.translate(input);
  568.     }

  569.     /**
  570.      * Escapes the characters in a {@code String} using Json String rules.
  571.      *
  572.      * <p>Escapes any values it finds into their Json String form.
  573.      * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
  574.      *
  575.      * <p>So a tab becomes the characters {@code '\\'} and
  576.      * {@code 't'}.</p>
  577.      *
  578.      * <p>The only difference between Java strings and Json strings
  579.      * is that in Json, forward-slash (/) is escaped.</p>
  580.      *
  581.      * <p>See http://www.ietf.org/rfc/rfc4627.txt for further details.</p>
  582.      *
  583.      * <p>Example:</p>
  584.      * <pre>
  585.      * input string: He didn't say, "Stop!"
  586.      * output string: He didn't say, \"Stop!\"
  587.      * </pre>
  588.      *
  589.      * @param input  String to escape values in, may be null
  590.      * @return String with escaped values, {@code null} if null string input
  591.      */
  592.     public static String escapeJson(final String input) {
  593.         return ESCAPE_JSON.translate(input);
  594.     }

  595.     /**
  596.      * Escapes the characters in a {@code String} using XML entities.
  597.      *
  598.      * <p>For example: {@code "bread" & "butter"} =&gt;
  599.      * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
  600.      * </p>
  601.      *
  602.      * <p>Note that XML 1.0 is a text-only format: it cannot represent control
  603.      * characters or unpaired Unicode surrogate code points, even after escaping.
  604.      * {@code escapeXml10} will remove characters that do not fit in the
  605.      * following ranges:</p>
  606.      *
  607.      * <p>{@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
  608.      *
  609.      * <p>Though not strictly necessary, {@code escapeXml10} will escape
  610.      * characters in the following ranges:</p>
  611.      *
  612.      * <p>{@code [#x7F-#x84] | [#x86-#x9F]}</p>
  613.      *
  614.      * <p>The returned string can be inserted into a valid XML 1.0 or XML 1.1
  615.      * document. If you want to allow more non-text characters in an XML 1.1
  616.      * document, use {@link #escapeXml11(String)}.</p>
  617.      *
  618.      * @param input  the {@code String} to escape, may be null
  619.      * @return a new escaped {@code String}, {@code null} if null string input
  620.      * @see #unescapeXml(String)
  621.      */
  622.     public static String escapeXml10(final String input) {
  623.         return ESCAPE_XML10.translate(input);
  624.     }

  625.     /**
  626.      * Escapes the characters in a {@code String} using XML entities.
  627.      *
  628.      * <p>For example: {@code "bread" & "butter"} =&gt;
  629.      * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
  630.      * </p>
  631.      *
  632.      * <p>XML 1.1 can represent certain control characters, but it cannot represent
  633.      * the null byte or unpaired Unicode surrogate code points, even after escaping.
  634.      * {@code escapeXml11} will remove characters that do not fit in the following
  635.      * ranges:</p>
  636.      *
  637.      * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
  638.      *
  639.      * <p>{@code escapeXml11} will escape characters in the following ranges:</p>
  640.      *
  641.      * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p>
  642.      *
  643.      * <p>The returned string can be inserted into a valid XML 1.1 document. Do not
  644.      * use it for XML 1.0 documents.</p>
  645.      *
  646.      * @param input  the {@code String} to escape, may be null
  647.      * @return a new escaped {@code String}, {@code null} if null string input
  648.      * @see #unescapeXml(String)
  649.      */
  650.     public static String escapeXml11(final String input) {
  651.         return ESCAPE_XML11.translate(input);
  652.     }

  653.     /**
  654.      * Escapes the characters in a {@code String} using XSI rules.
  655.      *
  656.      * <p><strong>Beware!</strong> In most cases you don't want to escape shell commands but use multi-argument
  657.      * methods provided by {@link ProcessBuilder} or {@link Runtime#exec(String[])}
  658.      * instead.</p>
  659.      *
  660.      * <p>Example:</p>
  661.      * <pre>
  662.      * input string: He didn't say, "Stop!"
  663.      * output string: He\ didn\'t\ say,\ \"Stop!\"
  664.      * </pre>
  665.      *
  666.      * @see <a href="https://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a>
  667.      * @param input  String to escape values in, may be null
  668.      * @return String with escaped values, {@code null} if null string input
  669.      */
  670.     public static String escapeXSI(final String input) {
  671.         return ESCAPE_XSI.translate(input);
  672.     }

  673.     /**
  674.      * Returns a {@code String} value for an unescaped CSV column.
  675.      *
  676.      * <p>If the value is enclosed in double quotes, and contains a comma, newline
  677.      *    or double quote, then quotes are removed.
  678.      * </p>
  679.      *
  680.      * <p>Any double quote escaped characters (a pair of double quotes) are unescaped
  681.      *    to just one double quote.</p>
  682.      *
  683.      * <p>If the value is not enclosed in double quotes, or is and does not contain a
  684.      *    comma, newline or double quote, then the String value is returned unchanged.</p>
  685.      *
  686.      * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
  687.      * <a href="https://tools.ietf.org/html/rfc4180">RFC 4180</a>.
  688.      *
  689.      * @param input the input CSV column String, may be null
  690.      * @return The input String, with enclosing double quotes removed and embedded double
  691.      * quotes unescaped, {@code null} if null string input
  692.      */
  693.     public static String unescapeCsv(final String input) {
  694.         return UNESCAPE_CSV.translate(input);
  695.     }

  696.     /**
  697.      * Unescapes any EcmaScript literals found in the {@code String}.
  698.      *
  699.      * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
  700.      * into a newline character, unless the {@code '\'} is preceded by another
  701.      * {@code '\'}.</p>
  702.      *
  703.      * @see #unescapeJava(String)
  704.      * @param input  the {@code String} to unescape, may be null
  705.      * @return A new unescaped {@code String}, {@code null} if null string input
  706.      */
  707.     public static String unescapeEcmaScript(final String input) {
  708.         return UNESCAPE_ECMASCRIPT.translate(input);
  709.     }

  710.     /**
  711.      * Unescapes a string containing entity escapes to a string
  712.      * containing the actual Unicode characters corresponding to the
  713.      * escapes. Supports only HTML 3.0 entities.
  714.      *
  715.      * @param input  the {@code String} to unescape, may be null
  716.      * @return a new unescaped {@code String}, {@code null} if null string input
  717.      */
  718.     public static String unescapeHtml3(final String input) {
  719.         return UNESCAPE_HTML3.translate(input);
  720.     }

  721.     /**
  722.      * Unescapes a string containing entity escapes to a string
  723.      * containing the actual Unicode characters corresponding to the
  724.      * escapes. Supports HTML 4.0 entities.
  725.      *
  726.      * <p>For example, the string {@code "&lt;Fran&ccedil;ais&gt;"}
  727.      * will become {@code "<Fran�ais>"}</p>
  728.      *
  729.      * <p>If an entity is unrecognized, it is left alone, and inserted
  730.      * verbatim into the result string. e.g. {@code "&gt;&zzzz;x"} will
  731.      * become {@code ">&zzzz;x"}.</p>
  732.      *
  733.      * @param input  the {@code String} to unescape, may be null
  734.      * @return a new unescaped {@code String}, {@code null} if null string input
  735.      */
  736.     public static String unescapeHtml4(final String input) {
  737.         return UNESCAPE_HTML4.translate(input);
  738.     }

  739.     /**
  740.      * Unescapes any Java literals found in the {@code String}.
  741.      * For example, it will turn a sequence of {@code '\'} and
  742.      * {@code 'n'} into a newline character, unless the {@code '\'}
  743.      * is preceded by another {@code '\'}.
  744.      *
  745.      * @param input  the {@code String} to unescape, may be null
  746.      * @return a new unescaped {@code String}, {@code null} if null string input
  747.      */
  748.     public static String unescapeJava(final String input) {
  749.         return UNESCAPE_JAVA.translate(input);
  750.     }

  751.     /**
  752.      * Unescapes any Json literals found in the {@code String}.
  753.      *
  754.      * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
  755.      * into a newline character, unless the {@code '\'} is preceded by another
  756.      * {@code '\'}.</p>
  757.      *
  758.      * @see #unescapeJava(String)
  759.      * @param input  the {@code String} to unescape, may be null
  760.      * @return A new unescaped {@code String}, {@code null} if null string input
  761.      */
  762.     public static String unescapeJson(final String input) {
  763.         return UNESCAPE_JSON.translate(input);
  764.     }

  765.     /**
  766.      * Unescapes a string containing XML entity escapes to a string
  767.      * containing the actual Unicode characters corresponding to the
  768.      * escapes.
  769.      *
  770.      * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
  771.      * Does not support DTDs or external entities.</p>
  772.      *
  773.      * <p>Note that numerical \\u Unicode codes are unescaped to their respective
  774.      *    Unicode characters. This may change in future releases.</p>
  775.      *
  776.      * @param input  the {@code String} to unescape, may be null
  777.      * @return a new unescaped {@code String}, {@code null} if null string input
  778.      * @see #escapeXml10(String)
  779.      * @see #escapeXml11(String)
  780.      */
  781.     public static String unescapeXml(final String input) {
  782.         return UNESCAPE_XML.translate(input);
  783.     }

  784.     /**
  785.      * Unescapes the characters in a {@code String} using XSI rules.
  786.      *
  787.      * @see StringEscapeUtils#escapeXSI(String)
  788.      * @param input  the {@code String} to unescape, may be null
  789.      * @return a new unescaped {@code String}, {@code null} if null string input
  790.      */
  791.     public static String unescapeXSI(final String input) {
  792.         return UNESCAPE_XSI.translate(input);
  793.     }

  794.     /**
  795.      * {@code StringEscapeUtils} instances should NOT be constructed in
  796.      * standard programming.
  797.      *
  798.      * <p>Instead, the class should be used as:</p>
  799.      * <pre>StringEscapeUtils.escapeJava("foo");</pre>
  800.      *
  801.      * <p>This constructor is public to permit tools that require a JavaBean
  802.      * instance to operate.</p>
  803.      */
  804.     public StringEscapeUtils() {
  805.     }

  806. }