StringEscapeUtils.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.text;

  18. import org.apache.commons.lang3.CharUtils;
  19. import org.apache.commons.lang3.StringUtils;
  20. import org.apache.commons.text.translate.AggregateTranslator;
  21. import org.apache.commons.text.translate.CharSequenceTranslator;
  22. import org.apache.commons.text.translate.EntityArrays;
  23. import org.apache.commons.text.translate.JavaUnicodeEscaper;
  24. import org.apache.commons.text.translate.LookupTranslator;
  25. import org.apache.commons.text.translate.NumericEntityEscaper;
  26. import org.apache.commons.text.translate.NumericEntityUnescaper;
  27. import org.apache.commons.text.translate.OctalUnescaper;
  28. import org.apache.commons.text.translate.SingleLookupTranslator;
  29. import org.apache.commons.text.translate.UnicodeUnescaper;
  30. import org.apache.commons.text.translate.UnicodeUnpairedSurrogateRemover;

  31. import java.io.IOException;
  32. import java.io.Writer;

  33. /**
  34.  * <p>Escapes and unescapes {@code String}s for
  35.  * Java, Java Script, HTML and XML.</p>
  36.  *
  37.  * <p>#ThreadSafe#</p>
  38.  *
  39.  *
  40.  * <p>
  41.  * This code has been adapted from Apache Commons Lang 3.5.
  42.  * </p>
  43.  *
  44.  * @since 1.0
  45.  */
  46. public class StringEscapeUtils {

  47.     /* ESCAPE TRANSLATORS */

  48.     /**
  49.      * Translator object for escaping Java.
  50.      *
  51.      * While {@link #escapeJava(String)} is the expected method of use, this
  52.      * object allows the Java escaping functionality to be used
  53.      * as the foundation for a custom translator.
  54.      */
  55.     public static final CharSequenceTranslator ESCAPE_JAVA =
  56.             new LookupTranslator(
  57.                     new String[][] {
  58.                             {"\"", "\\\""},
  59.                             {"\\", "\\\\"},
  60.                     }).with(
  61.                     new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE())
  62.             ).with(
  63.                     JavaUnicodeEscaper.outsideOf(32, 0x7f)
  64.             );

  65.     /**
  66.      * Translator object for escaping EcmaScript/JavaScript.
  67.      *
  68.      * While {@link #escapeEcmaScript(String)} is the expected method of use, this
  69.      * object allows the EcmaScript escaping functionality to be used
  70.      * as the foundation for a custom translator.
  71.      */
  72.     public static final CharSequenceTranslator ESCAPE_ECMASCRIPT =
  73.             new AggregateTranslator(
  74.                     new LookupTranslator(
  75.                             new String[][] {
  76.                                     {"'", "\\'"},
  77.                                     {"\"", "\\\""},
  78.                                     {"\\", "\\\\"},
  79.                                     {"/", "\\/"}
  80.                             }),
  81.                     new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
  82.                     JavaUnicodeEscaper.outsideOf(32, 0x7f)
  83.             );

  84.     /**
  85.      * Translator object for escaping Json.
  86.      *
  87.      * While {@link #escapeJson(String)} is the expected method of use, this
  88.      * object allows the Json escaping functionality to be used
  89.      * as the foundation for a custom translator.
  90.      */
  91.     public static final CharSequenceTranslator ESCAPE_JSON =
  92.             new AggregateTranslator(
  93.                     new LookupTranslator(
  94.                             new String[][] {
  95.                                     {"\"", "\\\""},
  96.                                     {"\\", "\\\\"},
  97.                                     {"/", "\\/"}
  98.                             }),
  99.                     new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
  100.                     JavaUnicodeEscaper.outsideOf(32, 0x7f)
  101.             );

  102.     /**
  103.      * Translator object for escaping XML 1.0.
  104.      *
  105.      * While {@link #escapeXml10(String)} is the expected method of use, this
  106.      * object allows the XML escaping functionality to be used
  107.      * as the foundation for a custom translator.
  108.      */
  109.     public static final CharSequenceTranslator ESCAPE_XML10 =
  110.             new AggregateTranslator(
  111.                     new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
  112.                     new LookupTranslator(EntityArrays.APOS_ESCAPE()),
  113.                     new LookupTranslator(
  114.                             new String[][] {
  115.                                     { "\u0000", StringUtils.EMPTY },
  116.                                     { "\u0001", StringUtils.EMPTY },
  117.                                     { "\u0002", StringUtils.EMPTY },
  118.                                     { "\u0003", StringUtils.EMPTY },
  119.                                     { "\u0004", StringUtils.EMPTY },
  120.                                     { "\u0005", StringUtils.EMPTY },
  121.                                     { "\u0006", StringUtils.EMPTY },
  122.                                     { "\u0007", StringUtils.EMPTY },
  123.                                     { "\u0008", StringUtils.EMPTY },
  124.                                     { "\u000b", StringUtils.EMPTY },
  125.                                     { "\u000c", StringUtils.EMPTY },
  126.                                     { "\u000e", StringUtils.EMPTY },
  127.                                     { "\u000f", StringUtils.EMPTY },
  128.                                     { "\u0010", StringUtils.EMPTY },
  129.                                     { "\u0011", StringUtils.EMPTY },
  130.                                     { "\u0012", StringUtils.EMPTY },
  131.                                     { "\u0013", StringUtils.EMPTY },
  132.                                     { "\u0014", StringUtils.EMPTY },
  133.                                     { "\u0015", StringUtils.EMPTY },
  134.                                     { "\u0016", StringUtils.EMPTY },
  135.                                     { "\u0017", StringUtils.EMPTY },
  136.                                     { "\u0018", StringUtils.EMPTY },
  137.                                     { "\u0019", StringUtils.EMPTY },
  138.                                     { "\u001a", StringUtils.EMPTY },
  139.                                     { "\u001b", StringUtils.EMPTY },
  140.                                     { "\u001c", StringUtils.EMPTY },
  141.                                     { "\u001d", StringUtils.EMPTY },
  142.                                     { "\u001e", StringUtils.EMPTY },
  143.                                     { "\u001f", StringUtils.EMPTY },
  144.                                     { "\ufffe", StringUtils.EMPTY },
  145.                                     { "\uffff", StringUtils.EMPTY }
  146.                             }),
  147.                     NumericEntityEscaper.between(0x7f, 0x84),
  148.                     NumericEntityEscaper.between(0x86, 0x9f),
  149.                     new UnicodeUnpairedSurrogateRemover()
  150.             );

  151.     /**
  152.      * Translator object for escaping XML 1.1.
  153.      *
  154.      * While {@link #escapeXml11(String)} is the expected method of use, this
  155.      * object allows the XML escaping functionality to be used
  156.      * as the foundation for a custom translator.
  157.      */
  158.     public static final CharSequenceTranslator ESCAPE_XML11 =
  159.             new AggregateTranslator(
  160.                     new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
  161.                     new LookupTranslator(EntityArrays.APOS_ESCAPE()),
  162.                     new LookupTranslator(
  163.                             new String[][] {
  164.                                     { "\u0000", StringUtils.EMPTY },
  165.                                     { "\u000b", "&#11;" },
  166.                                     { "\u000c", "&#12;" },
  167.                                     { "\ufffe", StringUtils.EMPTY },
  168.                                     { "\uffff", StringUtils.EMPTY }
  169.                             }),
  170.                     NumericEntityEscaper.between(0x1, 0x8),
  171.                     NumericEntityEscaper.between(0xe, 0x1f),
  172.                     NumericEntityEscaper.between(0x7f, 0x84),
  173.                     NumericEntityEscaper.between(0x86, 0x9f),
  174.                     new UnicodeUnpairedSurrogateRemover()
  175.             );

  176.     /**
  177.      * Translator object for escaping HTML version 3.0.
  178.      *
  179.      * While {@link #escapeHtml3(String)} is the expected method of use, this
  180.      * object allows the HTML escaping functionality to be used
  181.      * as the foundation for a custom translator.
  182.      */
  183.     public static final CharSequenceTranslator ESCAPE_HTML3 =
  184.             new AggregateTranslator(
  185.                     new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
  186.                     new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE())
  187.             );

  188.     /**
  189.      * The improved translator object for escaping HTML version 3.0.
  190.      * The 'improved' part of this translator is that it checks if the html is already translated.
  191.      * This check prevents double, triple, or recursive translations.
  192.      *
  193.      * While {@link #escapeHtml3Once(String)} is the expected method of use, this
  194.      * object allows the HTML escaping functionality to be used
  195.      * as the foundation for a custom translator.
  196.      *
  197.      * Note that, multiple lookup tables should be passed to this translator
  198.      * instead of passing multiple instances of this translator to the
  199.      * AggregateTranslator. Because, a SingleLookupTranslator only checks the values of the
  200.      * lookup table passed to that instance while deciding whether a value is
  201.      * already translated or not.
  202.      */
  203.     public static final CharSequenceTranslator ESCAPE_HTML3_ONCE =
  204.             new SingleLookupTranslator(EntityArrays.BASIC_ESCAPE(), EntityArrays.ISO8859_1_ESCAPE());


  205.     /**
  206.      * Translator object for escaping HTML version 4.0.
  207.      *
  208.      * While {@link #escapeHtml4(String)} is the expected method of use, this
  209.      * object allows the HTML escaping functionality to be used
  210.      * as the foundation for a custom translator.
  211.      */
  212.     public static final CharSequenceTranslator ESCAPE_HTML4 =
  213.             new AggregateTranslator(
  214.                     new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
  215.                     new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()),
  216.                     new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE())
  217.             );

  218.     /**
  219.      * The improved translator object for escaping HTML version 4.0.
  220.      * The 'improved' part of this translator is that it checks if the html is already translated.
  221.      * This check prevents double, triple, or recursive translations.
  222.      *
  223.      * While {@link #escapeHtml4Once(String)} is the expected method of use, this
  224.      * object allows the HTML escaping functionality to be used
  225.      * as the foundation for a custom translator.
  226.      *
  227.      * Note that, multiple lookup tables should be passed to this translator
  228.      * instead of passing multiple instances of this translator to the
  229.      * AggregateTranslator. Because, a SingleLookupTranslator only checks the values of the
  230.      * lookup table passed to that instance while deciding whether a value is
  231.      * already translated or not.
  232.      */
  233.     public static final CharSequenceTranslator ESCAPE_HTML4_ONCE =
  234.             new SingleLookupTranslator(EntityArrays.BASIC_ESCAPE(), EntityArrays.ISO8859_1_ESCAPE(), EntityArrays.HTML40_EXTENDED_ESCAPE());

  235.     /**
  236.      * Translator object for escaping individual Comma Separated Values.
  237.      *
  238.      * While {@link #escapeCsv(String)} is the expected method of use, this
  239.      * object allows the CSV escaping functionality to be used
  240.      * as the foundation for a custom translator.
  241.      */
  242.     public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper();

  243.     // TODO: Create a parent class - 'SinglePassTranslator' ?
  244.     //       It would handle the index checking + length returning,
  245.     //       and could also have an optimization check method.
  246.     static class CsvEscaper extends CharSequenceTranslator {

  247.         private static final char CSV_DELIMITER = ',';
  248.         private static final char CSV_QUOTE = '"';
  249.         private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
  250.         private static final char[] CSV_SEARCH_CHARS =
  251.                 new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};

  252.         @Override
  253.         public int translate(final CharSequence input, final int index, final Writer out) throws IOException {

  254.             if(index != 0) {
  255.                 throw new IllegalStateException("CsvEscaper should never reach the [1] index");
  256.             }

  257.             if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) {
  258.                 out.write(input.toString());
  259.             } else {
  260.                 out.write(CSV_QUOTE);
  261.                 out.write(StringUtils.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR));
  262.                 out.write(CSV_QUOTE);
  263.             }
  264.             return Character.codePointCount(input, 0, input.length());
  265.         }
  266.     }

  267.     /**
  268.      * Translator object for escaping Shell command language.
  269.      *
  270.      * @see <a href="http://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a>
  271.      */
  272.     public static final CharSequenceTranslator ESCAPE_XSI =
  273.         new LookupTranslator(
  274.             new String[][] {
  275.                     {"|", "\\|"},
  276.                     {"&", "\\&"},
  277.                     {";", "\\;"},
  278.                     {"<", "\\<"},
  279.                     {">", "\\>"},
  280.                     {"(", "\\("},
  281.                     {")", "\\)"},
  282.                     {"$", "\\$"},
  283.                     {"`", "\\`"},
  284.                     {"\\", "\\\\"},
  285.                     {"\"", "\\\""},
  286.                     {"'", "\\'"},
  287.                     {" ", "\\ "},
  288.                     {"\t", "\\\t"},
  289.                     {"\r\n", ""},
  290.                     {"\n", ""},
  291.                     {"*", "\\*"},
  292.                     {"?", "\\?"},
  293.                     {"[", "\\["},
  294.                     {"#", "\\#"},
  295.                     {"~", "\\~"},
  296.                     {"=", "\\="},
  297.                     {"%", "\\%"},
  298.             });

  299.     /* UNESCAPE TRANSLATORS */

  300.     /**
  301.      * Translator object for unescaping escaped Java.
  302.      *
  303.      * While {@link #unescapeJava(String)} is the expected method of use, this
  304.      * object allows the Java unescaping functionality to be used
  305.      * as the foundation for a custom translator.
  306.      */
  307.     // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)?
  308.     public static final CharSequenceTranslator UNESCAPE_JAVA =
  309.             new AggregateTranslator(
  310.                     new OctalUnescaper(),     // .between('\1', '\377'),
  311.                     new UnicodeUnescaper(),
  312.                     new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()),
  313.                     new LookupTranslator(
  314.                             new String[][] {
  315.                                     {"\\\\", "\\"},
  316.                                     {"\\\"", "\""},
  317.                                     {"\\'", "'"},
  318.                                     {"\\", ""}
  319.                             })
  320.             );

  321.     /**
  322.      * Translator object for unescaping escaped EcmaScript.
  323.      *
  324.      * While {@link #unescapeEcmaScript(String)} is the expected method of use, this
  325.      * object allows the EcmaScript unescaping functionality to be used
  326.      * as the foundation for a custom translator.
  327.      */
  328.     public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA;

  329.     /**
  330.      * Translator object for unescaping escaped Json.
  331.      *
  332.      * While {@link #unescapeJson(String)} is the expected method of use, this
  333.      * object allows the Json unescaping functionality to be used
  334.      * as the foundation for a custom translator.
  335.      */
  336.     public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA;

  337.     /**
  338.      * Translator object for unescaping escaped HTML 3.0.
  339.      *
  340.      * While {@link #unescapeHtml3(String)} is the expected method of use, this
  341.      * object allows the HTML unescaping functionality to be used
  342.      * as the foundation for a custom translator.
  343.      */
  344.     public static final CharSequenceTranslator UNESCAPE_HTML3 =
  345.             new AggregateTranslator(
  346.                     new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
  347.                     new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
  348.                     new NumericEntityUnescaper()
  349.             );

  350.     /**
  351.      * Translator object for unescaping escaped HTML 4.0.
  352.      *
  353.      * While {@link #unescapeHtml4(String)} is the expected method of use, this
  354.      * object allows the HTML unescaping functionality to be used
  355.      * as the foundation for a custom translator.
  356.      */
  357.     public static final CharSequenceTranslator UNESCAPE_HTML4 =
  358.             new AggregateTranslator(
  359.                     new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
  360.                     new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
  361.                     new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()),
  362.                     new NumericEntityUnescaper()
  363.             );

  364.     /**
  365.      * Translator object for unescaping escaped XML.
  366.      *
  367.      * While {@link #unescapeXml(String)} is the expected method of use, this
  368.      * object allows the XML unescaping functionality to be used
  369.      * as the foundation for a custom translator.
  370.      */
  371.     public static final CharSequenceTranslator UNESCAPE_XML =
  372.             new AggregateTranslator(
  373.                     new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
  374.                     new LookupTranslator(EntityArrays.APOS_UNESCAPE()),
  375.                     new NumericEntityUnescaper()
  376.             );

  377.     /**
  378.      * Translator object for unescaping escaped Comma Separated Value entries.
  379.      *
  380.      * While {@link #unescapeCsv(String)} is the expected method of use, this
  381.      * object allows the CSV unescaping functionality to be used
  382.      * as the foundation for a custom translator.
  383.      */
  384.     public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper();

  385.     static class CsvUnescaper extends CharSequenceTranslator {

  386.         private static final char CSV_DELIMITER = ',';
  387.         private static final char CSV_QUOTE = '"';
  388.         private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
  389.         private static final char[] CSV_SEARCH_CHARS =
  390.                 new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};

  391.         @Override
  392.         public int translate(final CharSequence input, final int index, final Writer out) throws IOException {

  393.             if(index != 0) {
  394.                 throw new IllegalStateException("CsvUnescaper should never reach the [1] index");
  395.             }

  396.             if ( input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE ) {
  397.                 out.write(input.toString());
  398.                 return Character.codePointCount(input, 0, input.length());
  399.             }

  400.             // strip quotes
  401.             final String quoteless = input.subSequence(1, input.length() - 1).toString();

  402.             if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) {
  403.                 // deal with escaped quotes; ie) ""
  404.                 out.write(StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR));
  405.             } else {
  406.                 out.write(input.toString());
  407.             }
  408.             return Character.codePointCount(input, 0, input.length());
  409.         }
  410.     }

  411.     public static final CharSequenceTranslator UNESCAPE_XSI = new XsiUnescaper();

  412.     /**
  413.      * Translator object for unescaping backslash escaped entries.
  414.      */
  415.     static class XsiUnescaper extends CharSequenceTranslator {

  416.         private static final char BACKSLASH = '\\';

  417.         @Override
  418.         public int translate(final CharSequence input, final int index, final Writer out) throws IOException {

  419.             if(index != 0) {
  420.                 throw new IllegalStateException("XsiUnescaper should never reach the [1] index");
  421.             }

  422.             String s = input.toString();

  423.             int segmentStart = 0;
  424.             int searchOffset = 0;
  425.             while (true) {
  426.                 int pos = s.indexOf(BACKSLASH, searchOffset);
  427.                 if (pos == -1) {
  428.                     if (segmentStart < s.length()) {
  429.                         out.write(s.substring(segmentStart));
  430.                     }
  431.                     break;
  432.                 }
  433.                 if (pos > segmentStart) {
  434.                     out.write(s.substring(segmentStart, pos));
  435.                 }
  436.                 segmentStart = pos + 1;
  437.                 searchOffset = pos + 2;
  438.             }

  439.             return Character.codePointCount(input, 0, input.length());
  440.         }
  441.     }

  442.     /* Helper functions */

  443.     /**
  444.      * <p>{@code StringEscapeUtils} instances should NOT be constructed in
  445.      * standard programming.</p>
  446.      *
  447.      * <p>Instead, the class should be used as:</p>
  448.      * <pre>StringEscapeUtils.escapeJava("foo");</pre>
  449.      *
  450.      * <p>This constructor is public to permit tools that require a JavaBean
  451.      * instance to operate.</p>
  452.      */
  453.     public StringEscapeUtils() {
  454.         super();
  455.     }

  456.     /**
  457.      * <p>Convenience wrapper for {@link java.lang.StringBuilder} providing escape methods.</p>
  458.      *
  459.      * <p>Example:</p>
  460.      * <pre>
  461.      * new Builder(ESCAPE_HTML4)
  462.      *      .append("&lt;p&gt;")
  463.      *      .escape("This is paragraph 1 and special chars like &amp; get escaped.")
  464.      *      .append("&lt;/p&gt;&lt;p&gt;")
  465.      *      .escape("This is paragraph 2 &amp; more...")
  466.      *      .append("&lt;/p&gt;")
  467.      *      .toString()
  468.      * </pre>
  469.      *
  470.      */
  471.     public static class Builder {

  472.         private final StringBuilder sb;
  473.         private final CharSequenceTranslator translator;

  474.         private Builder(final CharSequenceTranslator translator) {
  475.             this.sb = new StringBuilder();
  476.             this.translator = translator;
  477.         }

  478.         /**
  479.          * <p>Escape {@code input} according to the given {@link CharSequenceTranslator}.</p>
  480.          *
  481.          * @param input the String to escape
  482.          * @return {@code this}, to enable chaining
  483.          */
  484.         public Builder escape(final String input) {
  485.             sb.append(translator.translate(input));
  486.             return this;
  487.         }

  488.         /**
  489.          * Literal append, no escaping being done.
  490.          *
  491.          * @param input the String to append
  492.          * @return {@code this}, to enable chaining
  493.          */
  494.         public Builder append(final String input) {
  495.             sb.append(input);
  496.             return this;
  497.         }

  498.         /**
  499.          * <p>Return the escaped string.</p>
  500.          *
  501.          * @return the escaped string
  502.          */
  503.         @Override
  504.         public String toString() {
  505.             return sb.toString();
  506.         }
  507.     }

  508.     /**
  509.      * Get a {@link Builder}.
  510.      * @param translator the text translator
  511.      * @return {@link Builder}
  512.      */
  513.     public static StringEscapeUtils.Builder builder(final CharSequenceTranslator translator) {
  514.         return new Builder(translator);
  515.     }

  516.     // Java and JavaScript
  517.     //--------------------------------------------------------------------------
  518.     /**
  519.      * <p>Escapes the characters in a {@code String} using Java String rules.</p>
  520.      *
  521.      * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
  522.      *
  523.      * <p>So a tab becomes the characters {@code '\\'} and
  524.      * {@code 't'}.</p>
  525.      *
  526.      * <p>The only difference between Java strings and JavaScript strings
  527.      * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p>
  528.      *
  529.      * <p>Example:</p>
  530.      * <pre>
  531.      * input string: He didn't say, "Stop!"
  532.      * output string: He didn't say, \"Stop!\"
  533.      * </pre>
  534.      *
  535.      * @param input  String to escape values in, may be null
  536.      * @return String with escaped values, {@code null} if null string input
  537.      */
  538.     public static final String escapeJava(final String input) {
  539.         return ESCAPE_JAVA.translate(input);
  540.     }

  541.     /**
  542.      * <p>Escapes the characters in a {@code String} using EcmaScript String rules.</p>
  543.      * <p>Escapes any values it finds into their EcmaScript String form.
  544.      * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
  545.      *
  546.      * <p>So a tab becomes the characters {@code '\\'} and
  547.      * {@code 't'}.</p>
  548.      *
  549.      * <p>The only difference between Java strings and EcmaScript strings
  550.      * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p>
  551.      *
  552.      * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects. </p>
  553.      *
  554.      * <p>Example:</p>
  555.      * <pre>
  556.      * input string: He didn't say, "Stop!"
  557.      * output string: He didn\'t say, \"Stop!\"
  558.      * </pre>
  559.      *
  560.      * <b>Security Note.</b> We only provide backslash escaping in this method. For example, {@code '\"'} has the output
  561.      * {@code '\\\"'} which could result in potential issues in the case where the string being escaped is being used
  562.      * in an HTML tag like {@code <select onmouseover="..." />}. If you wish to have more rigorous string escaping, you
  563.      * may consider the
  564.      * <a href="https://www.owasp.org/index.php/Category:OWASP_Enterprise_Security_API_JAVA">ESAPI Libraries</a>. Further,
  565.      * you can view the
  566.      * <a href="https://github.com/esapi">ESAPI GitHub Org</a>.
  567.      *
  568.      * @param input  String to escape values in, may be null
  569.      * @return String with escaped values, {@code null} if null string input
  570.      */
  571.     public static final String escapeEcmaScript(final String input) {
  572.         return ESCAPE_ECMASCRIPT.translate(input);
  573.     }

  574.     /**
  575.      * <p>Escapes the characters in a {@code String} using Json String rules.</p>
  576.      * <p>Escapes any values it finds into their Json String form.
  577.      * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
  578.      *
  579.      * <p>So a tab becomes the characters {@code '\\'} and
  580.      * {@code 't'}.</p>
  581.      *
  582.      * <p>The only difference between Java strings and Json strings
  583.      * is that in Json, forward-slash (/) is escaped.</p>
  584.      *
  585.      * <p>See http://www.ietf.org/rfc/rfc4627.txt for further details. </p>
  586.      *
  587.      * <p>Example:</p>
  588.      * <pre>
  589.      * input string: He didn't say, "Stop!"
  590.      * output string: He didn't say, \"Stop!\"
  591.      * </pre>
  592.      *
  593.      * @param input  String to escape values in, may be null
  594.      * @return String with escaped values, {@code null} if null string input
  595.      */
  596.     public static final String escapeJson(final String input) {
  597.         return ESCAPE_JSON.translate(input);
  598.     }

  599.     /**
  600.      * <p>Unescapes any Java literals found in the {@code String}.
  601.      * For example, it will turn a sequence of {@code '\'} and
  602.      * {@code 'n'} into a newline character, unless the {@code '\'}
  603.      * is preceded by another {@code '\'}.</p>
  604.      *
  605.      * @param input  the {@code String} to unescape, may be null
  606.      * @return a new unescaped {@code String}, {@code null} if null string input
  607.      */
  608.     public static final String unescapeJava(final String input) {
  609.         return UNESCAPE_JAVA.translate(input);
  610.     }

  611.     /**
  612.      * <p>Unescapes any EcmaScript literals found in the {@code String}.</p>
  613.      *
  614.      * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
  615.      * into a newline character, unless the {@code '\'} is preceded by another
  616.      * {@code '\'}.</p>
  617.      *
  618.      * @see #unescapeJava(String)
  619.      * @param input  the {@code String} to unescape, may be null
  620.      * @return A new unescaped {@code String}, {@code null} if null string input
  621.      */
  622.     public static final String unescapeEcmaScript(final String input) {
  623.         return UNESCAPE_ECMASCRIPT.translate(input);
  624.     }

  625.     /**
  626.      * <p>Unescapes any Json literals found in the {@code String}.</p>
  627.      *
  628.      * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
  629.      * into a newline character, unless the {@code '\'} is preceded by another
  630.      * {@code '\'}.</p>
  631.      *
  632.      * @see #unescapeJava(String)
  633.      * @param input  the {@code String} to unescape, may be null
  634.      * @return A new unescaped {@code String}, {@code null} if null string input
  635.      */
  636.     public static final String unescapeJson(final String input) {
  637.         return UNESCAPE_JSON.translate(input);
  638.     }

  639.     // HTML and XML
  640.     //--------------------------------------------------------------------------
  641.     /**
  642.      * <p>Escapes the characters in a {@code String} using HTML entities.</p>
  643.      *
  644.      * <p>
  645.      * For example:
  646.      * </p>
  647.      * <p><code>"bread" &amp; "butter"</code></p>
  648.      * becomes:
  649.      * <p>
  650.      * <code>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</code>.
  651.      * </p>
  652.      *
  653.      * <p>Supports all known HTML 4.0 entities, including funky accents.
  654.      * Note that the commonly used apostrophe escape character (&amp;apos;)
  655.      * is not a legal entity and so is not supported). </p>
  656.      *
  657.      * @param input  the {@code String} to escape, may be null
  658.      * @return a new escaped {@code String}, {@code null} if null string input
  659.      *
  660.      * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
  661.      * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
  662.      * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
  663.      * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
  664.      * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
  665.      */
  666.     public static final String escapeHtml4(final String input) {
  667.         return ESCAPE_HTML4.translate(input);
  668.     }

  669.     /**
  670.      * <p>Escapes the characters in a {@code String} using HTML entities.
  671.      * But escapes them only once. i.e. does not escape already escaped characters.</p>
  672.      *
  673.      * <p>
  674.      * For example:
  675.      * </p>
  676.      * <p><code>"bread" &amp; "butter"</code></p>
  677.      * becomes:
  678.      * <p>
  679.      * <code>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</code>.
  680.      * </p>
  681.      *
  682.      * <p>
  683.      * But:
  684.      * </p>
  685.      * <p><code>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</code></p>
  686.      * remains unaffected.
  687.      *
  688.      * <p>Supports all known HTML 4.0 entities, including funky accents.
  689.      * Note that the commonly used apostrophe escape character (&amp;apos;)
  690.      * is not a legal entity and so is not supported). </p>
  691.      *
  692.      * @param input  the {@code String} to escape, may be null
  693.      * @return a new escaped {@code String}, {@code null} if null string input
  694.      *
  695.      * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
  696.      * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
  697.      * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
  698.      * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
  699.      * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
  700.      */
  701.     public static final String escapeHtml4Once(final String input) {
  702.         return ESCAPE_HTML4_ONCE.translate(input);
  703.     }


  704.     /**
  705.      * <p>Escapes the characters in a {@code String} using HTML entities.</p>
  706.      * <p>Supports only the HTML 3.0 entities. </p>
  707.      *
  708.      * @param input  the {@code String} to escape, may be null
  709.      * @return a new escaped {@code String}, {@code null} if null string input
  710.      */
  711.     public static final String escapeHtml3(final String input) {
  712.         return ESCAPE_HTML3.translate(input);
  713.     }

  714.     /**
  715.      * <p>Escapes the characters in a {@code String} using HTML entities.
  716.      * But escapes them only once. i.e. does not escape already escaped characters.</p>
  717.      * <p>Supports only the HTML 3.0 entities. </p>
  718.      *
  719.      * @param input  the {@code String} to escape, may be null
  720.      * @return a new escaped {@code String}, {@code null} if null string input
  721.      */
  722.     public static final String escapeHtml3Once(final String input) {
  723.         return ESCAPE_HTML3_ONCE.translate(input);
  724.     }

  725.     //-----------------------------------------------------------------------
  726.     /**
  727.      * <p>Unescapes a string containing entity escapes to a string
  728.      * containing the actual Unicode characters corresponding to the
  729.      * escapes. Supports HTML 4.0 entities.</p>
  730.      *
  731.      * <p>For example, the string {@code "&lt;Fran&ccedil;ais&gt;"}
  732.      * will become {@code "<Français>"}</p>
  733.      *
  734.      * <p>If an entity is unrecognized, it is left alone, and inserted
  735.      * verbatim into the result string. e.g. {@code "&gt;&zzzz;x"} will
  736.      * become {@code ">&zzzz;x"}.</p>
  737.      *
  738.      * @param input  the {@code String} to unescape, may be null
  739.      * @return a new unescaped {@code String}, {@code null} if null string input
  740.      */
  741.     public static final String unescapeHtml4(final String input) {
  742.         return UNESCAPE_HTML4.translate(input);
  743.     }

  744.     /**
  745.      * <p>Unescapes a string containing entity escapes to a string
  746.      * containing the actual Unicode characters corresponding to the
  747.      * escapes. Supports only HTML 3.0 entities.</p>
  748.      *
  749.      * @param input  the {@code String} to unescape, may be null
  750.      * @return a new unescaped {@code String}, {@code null} if null string input
  751.      */
  752.     public static final String unescapeHtml3(final String input) {
  753.         return UNESCAPE_HTML3.translate(input);
  754.     }

  755.     /**
  756.      * <p>Escapes the characters in a {@code String} using XML entities.</p>
  757.      *
  758.      * <p>For example: {@code "bread" & "butter"} =&gt;
  759.      * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
  760.      * </p>
  761.      *
  762.      * <p>Note that XML 1.0 is a text-only format: it cannot represent control
  763.      * characters or unpaired Unicode surrogate codepoints, even after escaping.
  764.      * {@code escapeXml10} will remove characters that do not fit in the
  765.      * following ranges:</p>
  766.      *
  767.      * <p>{@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
  768.      *
  769.      * <p>Though not strictly necessary, {@code escapeXml10} will escape
  770.      * characters in the following ranges:</p>
  771.      *
  772.      * <p>{@code [#x7F-#x84] | [#x86-#x9F]}</p>
  773.      *
  774.      * <p>The returned string can be inserted into a valid XML 1.0 or XML 1.1
  775.      * document. If you want to allow more non-text characters in an XML 1.1
  776.      * document, use {@link #escapeXml11(String)}.</p>
  777.      *
  778.      * @param input  the {@code String} to escape, may be null
  779.      * @return a new escaped {@code String}, {@code null} if null string input
  780.      * @see #unescapeXml(java.lang.String)
  781.      */
  782.     public static String escapeXml10(final String input) {
  783.         return ESCAPE_XML10.translate(input);
  784.     }

  785.     /**
  786.      * <p>Escapes the characters in a {@code String} using XML entities.</p>
  787.      *
  788.      * <p>For example: {@code "bread" & "butter"} =&gt;
  789.      * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
  790.      * </p>
  791.      *
  792.      * <p>XML 1.1 can represent certain control characters, but it cannot represent
  793.      * the null byte or unpaired Unicode surrogate codepoints, even after escaping.
  794.      * {@code escapeXml11} will remove characters that do not fit in the following
  795.      * ranges:</p>
  796.      *
  797.      * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
  798.      *
  799.      * <p>{@code escapeXml11} will escape characters in the following ranges:</p>
  800.      *
  801.      * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p>
  802.      *
  803.      * <p>The returned string can be inserted into a valid XML 1.1 document. Do not
  804.      * use it for XML 1.0 documents.</p>
  805.      *
  806.      * @param input  the {@code String} to escape, may be null
  807.      * @return a new escaped {@code String}, {@code null} if null string input
  808.      * @see #unescapeXml(java.lang.String)
  809.      */
  810.     public static String escapeXml11(final String input) {
  811.         return ESCAPE_XML11.translate(input);
  812.     }

  813.     //-----------------------------------------------------------------------
  814.     /**
  815.      * <p>Unescapes a string containing XML entity escapes to a string
  816.      * containing the actual Unicode characters corresponding to the
  817.      * escapes.</p>
  818.      *
  819.      * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
  820.      * Does not support DTDs or external entities.</p>
  821.      *
  822.      * <p>Note that numerical \\u Unicode codes are unescaped to their respective
  823.      *    Unicode characters. This may change in future releases. </p>
  824.      *
  825.      * @param input  the {@code String} to unescape, may be null
  826.      * @return a new unescaped {@code String}, {@code null} if null string input
  827.      * @see #escapeXml10(String)
  828.      * @see #escapeXml11(String)
  829.      */
  830.     public static final String unescapeXml(final String input) {
  831.         return UNESCAPE_XML.translate(input);
  832.     }

  833.     //-----------------------------------------------------------------------

  834.     /**
  835.      * <p>Returns a {@code String} value for a CSV column enclosed in double quotes,
  836.      * if required.</p>
  837.      *
  838.      * <p>If the value contains a comma, newline or double quote, then the
  839.      *    String value is returned enclosed in double quotes.</p>
  840.      *
  841.      * <p>Any double quote characters in the value are escaped with another double quote.</p>
  842.      *
  843.      * <p>If the value does not contain a comma, newline or double quote, then the
  844.      *    String value is returned unchanged.</p>
  845.      *
  846.      * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
  847.      * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
  848.      *
  849.      * @param input the input CSV column String, may be null
  850.      * @return the input String, enclosed in double quotes if the value contains a comma,
  851.      * newline or double quote, {@code null} if null string input
  852.      */
  853.     public static final String escapeCsv(final String input) {
  854.         return ESCAPE_CSV.translate(input);
  855.     }

  856.     /**
  857.      * <p>Returns a {@code String} value for an unescaped CSV column. </p>
  858.      *
  859.      * <p>If the value is enclosed in double quotes, and contains a comma, newline
  860.      *    or double quote, then quotes are removed.
  861.      * </p>
  862.      *
  863.      * <p>Any double quote escaped characters (a pair of double quotes) are unescaped
  864.      *    to just one double quote. </p>
  865.      *
  866.      * <p>If the value is not enclosed in double quotes, or is and does not contain a
  867.      *    comma, newline or double quote, then the String value is returned unchanged.</p>
  868.      *
  869.      * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
  870.      * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
  871.      *
  872.      * @param input the input CSV column String, may be null
  873.      * @return the input String, with enclosing double quotes removed and embedded double
  874.      * quotes unescaped, {@code null} if null string input
  875.      */
  876.     public static final String unescapeCsv(final String input) {
  877.         return UNESCAPE_CSV.translate(input);
  878.     }

  879.     /**
  880.      * <p>Escapes the characters in a {@code String} using XSI rules.</p>
  881.      *
  882.      * <p><b>Beware!</b> In most cases you don't want to escape shell commands but use multi-argument
  883.      * methods provided by {@link java.lang.ProcessBuilder} or {@link java.lang.Runtime#exec(String[])}
  884.      * instead.</p>
  885.      *
  886.      * <p>Example:</p>
  887.      * <pre>
  888.      * input string: He didn't say, "Stop!"
  889.      * output string: He\ didn\'t\ say,\ \"Stop!\"
  890.      * </pre>
  891.      *
  892.      * @see <a href="http://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a>
  893.      * @param input  String to escape values in, may be null
  894.      * @return String with escaped values, {@code null} if null string input
  895.      */
  896.     public static final String escapeXSI(final String input) {
  897.         return ESCAPE_XSI.translate(input);
  898.     }

  899.     /**
  900.      * <p>Unescapes the characters in a {@code String} using XSI rules.</p>
  901.      *
  902.      * @see StringEscapeUtils#escapeXSI(String)
  903.      * @param input  the {@code String} to unescape, may be null
  904.      * @return a new unescaped {@code String}, {@code null} if null string input
  905.      */
  906.     public static final String unescapeXSI(final String input) {
  907.         return UNESCAPE_XSI.translate(input);
  908.     }

  909. }