View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.text;
18  
19  import java.io.IOException;
20  import java.io.Writer;
21  import java.util.Collections;
22  import java.util.HashMap;
23  import java.util.Map;
24  
25  import org.apache.commons.lang3.StringUtils;
26  import org.apache.commons.text.translate.AggregateTranslator;
27  import org.apache.commons.text.translate.CharSequenceTranslator;
28  import org.apache.commons.text.translate.CsvTranslators;
29  import org.apache.commons.text.translate.EntityArrays;
30  import org.apache.commons.text.translate.JavaUnicodeEscaper;
31  import org.apache.commons.text.translate.LookupTranslator;
32  import org.apache.commons.text.translate.NumericEntityEscaper;
33  import org.apache.commons.text.translate.NumericEntityUnescaper;
34  import org.apache.commons.text.translate.OctalUnescaper;
35  import org.apache.commons.text.translate.UnicodeUnescaper;
36  import org.apache.commons.text.translate.UnicodeUnpairedSurrogateRemover;
37  
38  /**
39   * <p>
40   * Escapes and unescapes {@code String}s for Java, Java Script, HTML and XML.
41   * </p>
42   *
43   * <p>
44   * #ThreadSafe#
45   * </p>
46   *
47   * <p>
48   * This code has been adapted from Apache Commons Lang 3.5.
49   * </p>
50   *
51   * @since 1.0
52   */
53  public class StringEscapeUtils {
54  
55      /* ESCAPE TRANSLATORS */
56  
57      /**
58       * Convenience wrapper for {@link StringBuilder} providing escape methods.
59       *
60       * <p>Example:</p>
61       * <pre>
62       * new Builder(ESCAPE_HTML4)
63       *      .append("&lt;p&gt;")
64       *      .escape("This is paragraph 1 and special chars like &amp; get escaped.")
65       *      .append("&lt;/p&gt;&lt;p&gt;")
66       *      .escape("This is paragraph 2 &amp; more...")
67       *      .append("&lt;/p&gt;")
68       *      .toString()
69       * </pre>
70       */
71      public static final class Builder {
72  
73          /**
74           * StringBuilder to be used in the Builder class.
75           */
76          private final StringBuilder sb;
77  
78          /**
79           * CharSequenceTranslator to be used in the Builder class.
80           */
81          private final CharSequenceTranslator translator;
82  
83          /**
84           * Builder constructor.
85           *
86           * @param translator a CharSequenceTranslator.
87           */
88          private Builder(final CharSequenceTranslator translator) {
89              this.sb = new StringBuilder();
90              this.translator = translator;
91          }
92  
93          /**
94           * Literal append, no escaping being done.
95           *
96           * @param input the String to append
97           * @return {@code this}, to enable chaining
98           */
99          public Builder append(final String input) {
100             sb.append(input);
101             return this;
102         }
103 
104         /**
105          * Escape {@code input} according to the given {@link CharSequenceTranslator}.
106          *
107          * @param input the String to escape
108          * @return {@code this}, to enable chaining
109          */
110         public Builder escape(final String input) {
111             sb.append(translator.translate(input));
112             return this;
113         }
114 
115         /**
116          * Return the escaped string.
117          *
118          * @return The escaped string
119          */
120         @Override
121         public String toString() {
122             return sb.toString();
123         }
124     }
125     /**
126      * Translator object for unescaping backslash escaped entries.
127      */
128     static class XsiUnescaper extends CharSequenceTranslator {
129 
130         /**
131          * Escaped backslash constant.
132          */
133         private static final char BACKSLASH = '\\';
134 
135         @Override
136         public int translate(final CharSequence input, final int index, final Writer writer) throws IOException {
137 
138             if (index != 0) {
139                 throw new IllegalStateException("XsiUnescaper should never reach the [1] index");
140             }
141 
142             final String s = input.toString();
143 
144             int segmentStart = 0;
145             int searchOffset = 0;
146             while (true) {
147                 final int pos = s.indexOf(BACKSLASH, searchOffset);
148                 if (pos == -1) {
149                     if (segmentStart < s.length()) {
150                         writer.write(s.substring(segmentStart));
151                     }
152                     break;
153                 }
154                 if (pos > segmentStart) {
155                     writer.write(s.substring(segmentStart, pos));
156                 }
157                 segmentStart = pos + 1;
158                 searchOffset = pos + 2;
159             }
160 
161             return Character.codePointCount(input, 0, input.length());
162         }
163     }
164 
165     /**
166      * Translator object for escaping Java.
167      *
168      * While {@link #escapeJava(String)} is the expected method of use, this object allows the Java escaping functionality to be used as the foundation for a
169      * custom translator.
170      */
171     public static final CharSequenceTranslator ESCAPE_JAVA;
172     static {
173         final Map<CharSequence, CharSequence> escapeJavaMap = new HashMap<>();
174         escapeJavaMap.put("\"", "\\\"");
175         escapeJavaMap.put("\\", "\\\\");
176         ESCAPE_JAVA = new AggregateTranslator(
177                 new LookupTranslator(Collections.unmodifiableMap(escapeJavaMap)),
178                 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE),
179                 JavaUnicodeEscaper.outsideOf(32, 0x7f)
180         );
181     }
182 
183     /**
184      * Translator object for escaping EcmaScript/JavaScript.
185      *
186      * While {@link #escapeEcmaScript(String)} is the expected method of use, this object allows the EcmaScript escaping functionality to be used as the
187      * foundation for a custom translator.
188      */
189     public static final CharSequenceTranslator ESCAPE_ECMASCRIPT;
190     static {
191         final Map<CharSequence, CharSequence> escapeEcmaScriptMap = new HashMap<>();
192         escapeEcmaScriptMap.put("'", "\\'");
193         escapeEcmaScriptMap.put("\"", "\\\"");
194         escapeEcmaScriptMap.put("\\", "\\\\");
195         escapeEcmaScriptMap.put("/", "\\/");
196         ESCAPE_ECMASCRIPT = new AggregateTranslator(
197                 new LookupTranslator(Collections.unmodifiableMap(escapeEcmaScriptMap)),
198                 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE),
199                 JavaUnicodeEscaper.outsideOf(32, 0x7f)
200         );
201     }
202 
203     /**
204      * Translator object for escaping Json.
205      *
206      * While {@link #escapeJson(String)} is the expected method of use, this object allows the Json escaping functionality to be used as the foundation for a
207      * custom translator.
208      */
209     public static final CharSequenceTranslator ESCAPE_JSON;
210     static {
211         final Map<CharSequence, CharSequence> escapeJsonMap = new HashMap<>();
212         escapeJsonMap.put("\"", "\\\"");
213         escapeJsonMap.put("\\", "\\\\");
214         escapeJsonMap.put("/", "\\/");
215         ESCAPE_JSON = new AggregateTranslator(
216                 new LookupTranslator(Collections.unmodifiableMap(escapeJsonMap)),
217                 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE),
218                 JavaUnicodeEscaper.outsideOf(32, 0x7e)
219         );
220     }
221 
222     /**
223      * Translator object for escaping XML 1.0.
224      *
225      * While {@link #escapeXml10(String)} is the expected method of use, this object allows the XML escaping functionality to be used as the foundation for a
226      * custom translator.
227      */
228     public static final CharSequenceTranslator ESCAPE_XML10;
229     static {
230         final Map<CharSequence, CharSequence> escapeXml10Map = new HashMap<>();
231         escapeXml10Map.put("\u0000", StringUtils.EMPTY);
232         escapeXml10Map.put("\u0001", StringUtils.EMPTY);
233         escapeXml10Map.put("\u0002", StringUtils.EMPTY);
234         escapeXml10Map.put("\u0003", StringUtils.EMPTY);
235         escapeXml10Map.put("\u0004", StringUtils.EMPTY);
236         escapeXml10Map.put("\u0005", StringUtils.EMPTY);
237         escapeXml10Map.put("\u0006", StringUtils.EMPTY);
238         escapeXml10Map.put("\u0007", StringUtils.EMPTY);
239         escapeXml10Map.put("\u0008", StringUtils.EMPTY);
240         escapeXml10Map.put("\u000b", StringUtils.EMPTY);
241         escapeXml10Map.put("\u000c", StringUtils.EMPTY);
242         escapeXml10Map.put("\u000e", StringUtils.EMPTY);
243         escapeXml10Map.put("\u000f", StringUtils.EMPTY);
244         escapeXml10Map.put("\u0010", StringUtils.EMPTY);
245         escapeXml10Map.put("\u0011", StringUtils.EMPTY);
246         escapeXml10Map.put("\u0012", StringUtils.EMPTY);
247         escapeXml10Map.put("\u0013", StringUtils.EMPTY);
248         escapeXml10Map.put("\u0014", StringUtils.EMPTY);
249         escapeXml10Map.put("\u0015", StringUtils.EMPTY);
250         escapeXml10Map.put("\u0016", StringUtils.EMPTY);
251         escapeXml10Map.put("\u0017", StringUtils.EMPTY);
252         escapeXml10Map.put("\u0018", StringUtils.EMPTY);
253         escapeXml10Map.put("\u0019", StringUtils.EMPTY);
254         escapeXml10Map.put("\u001a", StringUtils.EMPTY);
255         escapeXml10Map.put("\u001b", StringUtils.EMPTY);
256         escapeXml10Map.put("\u001c", StringUtils.EMPTY);
257         escapeXml10Map.put("\u001d", StringUtils.EMPTY);
258         escapeXml10Map.put("\u001e", StringUtils.EMPTY);
259         escapeXml10Map.put("\u001f", StringUtils.EMPTY);
260         escapeXml10Map.put("\ufffe", StringUtils.EMPTY);
261         escapeXml10Map.put("\uffff", StringUtils.EMPTY);
262         ESCAPE_XML10 = new AggregateTranslator(
263                 new LookupTranslator(EntityArrays.BASIC_ESCAPE),
264                 new LookupTranslator(EntityArrays.APOS_ESCAPE),
265                 new LookupTranslator(Collections.unmodifiableMap(escapeXml10Map)),
266                 NumericEntityEscaper.between(0x7f, 0x84),
267                 NumericEntityEscaper.between(0x86, 0x9f),
268                 new UnicodeUnpairedSurrogateRemover()
269         );
270     }
271 
272     /**
273      * Translator object for escaping XML 1.1.
274      *
275      * While {@link #escapeXml11(String)} is the expected method of use, this
276      * object allows the XML escaping functionality to be used
277      * as the foundation for a custom translator.
278      */
279     public static final CharSequenceTranslator ESCAPE_XML11;
280 
281     static {
282         final Map<CharSequence, CharSequence> escapeXml11Map = new HashMap<>();
283         escapeXml11Map.put("\u0000", StringUtils.EMPTY);
284         escapeXml11Map.put("\u000b", "&#11;");
285         escapeXml11Map.put("\u000c", "&#12;");
286         escapeXml11Map.put("\ufffe", StringUtils.EMPTY);
287         escapeXml11Map.put("\uffff", StringUtils.EMPTY);
288         ESCAPE_XML11 = new AggregateTranslator(
289                 new LookupTranslator(EntityArrays.BASIC_ESCAPE),
290                 new LookupTranslator(EntityArrays.APOS_ESCAPE),
291                 new LookupTranslator(Collections.unmodifiableMap(escapeXml11Map)),
292                 NumericEntityEscaper.between(0x1, 0x8),
293                 NumericEntityEscaper.between(0xe, 0x1f),
294                 NumericEntityEscaper.between(0x7f, 0x84),
295                 NumericEntityEscaper.between(0x86, 0x9f),
296                 new UnicodeUnpairedSurrogateRemover()
297         );
298     }
299 
300     /**
301      * Translator object for escaping HTML version 3.0.
302      *
303      * While {@link #escapeHtml3(String)} is the expected method of use, this
304      * object allows the HTML escaping functionality to be used
305      * as the foundation for a custom translator.
306      */
307     public static final CharSequenceTranslator ESCAPE_HTML3 =
308             new AggregateTranslator(
309                     new LookupTranslator(EntityArrays.BASIC_ESCAPE),
310                     new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE)
311             );
312 
313     /**
314      * Translator object for escaping HTML version 4.0.
315      *
316      * While {@link #escapeHtml4(String)} is the expected method of use, this
317      * object allows the HTML escaping functionality to be used
318      * as the foundation for a custom translator.
319      */
320     public static final CharSequenceTranslator ESCAPE_HTML4 =
321             new AggregateTranslator(
322                     new LookupTranslator(EntityArrays.BASIC_ESCAPE),
323                     new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE),
324                     new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE)
325             );
326     /**
327      * Translator object for escaping individual Comma Separated Values.
328      *
329      * While {@link #escapeCsv(String)} is the expected method of use, this
330      * object allows the CSV escaping functionality to be used
331      * as the foundation for a custom translator.
332      */
333     public static final CharSequenceTranslator ESCAPE_CSV = new CsvTranslators.CsvEscaper();
334 
335     /* UNESCAPE TRANSLATORS */
336 
337     /**
338      * Translator object for escaping Shell command language.
339      *
340      * @see <a href="https://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a>
341      */
342     public static final CharSequenceTranslator ESCAPE_XSI;
343     static {
344         final Map<CharSequence, CharSequence> escapeXsiMap = new HashMap<>();
345         escapeXsiMap.put("|", "\\|");
346         escapeXsiMap.put("&", "\\&");
347         escapeXsiMap.put(";", "\\;");
348         escapeXsiMap.put("<", "\\<");
349         escapeXsiMap.put(">", "\\>");
350         escapeXsiMap.put("(", "\\(");
351         escapeXsiMap.put(")", "\\)");
352         escapeXsiMap.put("$", "\\$");
353         escapeXsiMap.put("`", "\\`");
354         escapeXsiMap.put("\\", "\\\\");
355         escapeXsiMap.put("\"", "\\\"");
356         escapeXsiMap.put("'", "\\'");
357         escapeXsiMap.put(" ", "\\ ");
358         escapeXsiMap.put("\t", "\\\t");
359         escapeXsiMap.put("\r\n", StringUtils.EMPTY);
360         escapeXsiMap.put("\n", StringUtils.EMPTY);
361         escapeXsiMap.put("*", "\\*");
362         escapeXsiMap.put("?", "\\?");
363         escapeXsiMap.put("[", "\\[");
364         escapeXsiMap.put("#", "\\#");
365         escapeXsiMap.put("~", "\\~");
366         escapeXsiMap.put("=", "\\=");
367         escapeXsiMap.put("%", "\\%");
368         ESCAPE_XSI = new LookupTranslator(
369                 Collections.unmodifiableMap(escapeXsiMap)
370         );
371     }
372 
373     /**
374      * Translator object for unescaping escaped Java.
375      *
376      * While {@link #unescapeJava(String)} is the expected method of use, this
377      * object allows the Java unescaping functionality to be used
378      * as the foundation for a custom translator.
379      */
380     public static final CharSequenceTranslator UNESCAPE_JAVA;
381 
382     static {
383         final Map<CharSequence, CharSequence> unescapeJavaMap = new HashMap<>();
384         unescapeJavaMap.put("\\\\", "\\");
385         unescapeJavaMap.put("\\\"", "\"");
386         unescapeJavaMap.put("\\'", "'");
387         unescapeJavaMap.put("\\", StringUtils.EMPTY);
388         UNESCAPE_JAVA = new AggregateTranslator(
389                 new OctalUnescaper(),     // .between('\1', '\377'),
390                 new UnicodeUnescaper(),
391                 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE),
392                 new LookupTranslator(Collections.unmodifiableMap(unescapeJavaMap))
393         );
394     }
395 
396     /**
397      * Translator object for unescaping escaped EcmaScript.
398      *
399      * While {@link #unescapeEcmaScript(String)} is the expected method of use, this
400      * object allows the EcmaScript unescaping functionality to be used
401      * as the foundation for a custom translator.
402      */
403     public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA;
404 
405     /**
406      * Translator object for unescaping escaped Json.
407      *
408      * While {@link #unescapeJson(String)} is the expected method of use, this
409      * object allows the Json unescaping functionality to be used
410      * as the foundation for a custom translator.
411      */
412     public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA;
413 
414     /**
415      * Translator object for unescaping escaped HTML 3.0.
416      *
417      * While {@link #unescapeHtml3(String)} is the expected method of use, this
418      * object allows the HTML unescaping functionality to be used
419      * as the foundation for a custom translator.
420      */
421     public static final CharSequenceTranslator UNESCAPE_HTML3 =
422             new AggregateTranslator(
423                     new LookupTranslator(EntityArrays.BASIC_UNESCAPE),
424                     new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE),
425                     new NumericEntityUnescaper()
426             );
427 
428     /**
429      * Translator object for unescaping escaped HTML 4.0.
430      *
431      * While {@link #unescapeHtml4(String)} is the expected method of use, this
432      * object allows the HTML unescaping functionality to be used
433      * as the foundation for a custom translator.
434      */
435     public static final CharSequenceTranslator UNESCAPE_HTML4 =
436             new AggregateTranslator(
437                     new LookupTranslator(EntityArrays.BASIC_UNESCAPE),
438                     new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE),
439                     new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE),
440                     new NumericEntityUnescaper()
441             );
442 
443     /**
444      * Translator object for unescaping escaped XML.
445      *
446      * While {@link #unescapeXml(String)} is the expected method of use, this
447      * object allows the XML unescaping functionality to be used
448      * as the foundation for a custom translator.
449      */
450     public static final CharSequenceTranslator UNESCAPE_XML =
451             new AggregateTranslator(
452                     new LookupTranslator(EntityArrays.BASIC_UNESCAPE),
453                     new LookupTranslator(EntityArrays.APOS_UNESCAPE),
454                     new NumericEntityUnescaper()
455             );
456 
457     /**
458      * Translator object for unescaping escaped Comma Separated Value entries.
459      *
460      * While {@link #unescapeCsv(String)} is the expected method of use, this
461      * object allows the CSV unescaping functionality to be used
462      * as the foundation for a custom translator.
463      */
464     public static final CharSequenceTranslator UNESCAPE_CSV = new CsvTranslators.CsvUnescaper();
465 
466     /* Helper functions */
467 
468     /**
469      * Translator object for unescaping escaped XSI Value entries.
470      *
471      * While {@link #unescapeXSI(String)}  is the expected method of use, this
472      * object allows the XSI unescaping functionality to be used
473      * as the foundation for a custom translator.
474      */
475     public static final CharSequenceTranslator UNESCAPE_XSI = new XsiUnescaper();
476 
477     /**
478      * Gets a {@link Builder}.
479      *
480      * @param translator the text translator.
481      * @return {@link Builder}
482      */
483     public static StringEscapeUtils.Builder builder(final CharSequenceTranslator translator) {
484         return new Builder(translator);
485     }
486 
487     /**
488      * Returns a {@code String} value for a CSV column enclosed in double quotes, if required.
489      *
490      * <p>
491      * If the value contains a comma, newline or double quote, then the String value is returned enclosed in double quotes.
492      * </p>
493      *
494      * <p>
495      * Any double quote characters in the value are escaped with another double quote.
496      * </p>
497      *
498      * <p>
499      * If the value does not contain a comma, newline or double quote, then the String value is returned unchanged.
500      * </p>
501      * <p>
502      * See <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and <a href="https://tools.ietf.org/html/rfc4180">RFC 4180</a>.
503      * </p>
504      *
505      * @param input the input CSV column String, may be null.
506      * @return The input String, enclosed in double quotes if the value contains a comma, newline or double quote, {@code null} if null string input.
507      */
508     public static String escapeCsv(final String input) {
509         return ESCAPE_CSV.translate(input);
510     }
511 
512     /**
513      * Escapes the characters in a {@code String} using EcmaScript String rules.
514      *
515      * <p>
516      * Escapes any values it finds into their EcmaScript String form. Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.)
517      * </p>
518      *
519      * <p>
520      * So a tab becomes the characters {@code '\\'} and {@code 't'}.
521      * </p>
522      *
523      * <p>
524      * The only difference between Java strings and EcmaScript strings is that in EcmaScript, a single quote and forward-slash (/) are escaped.
525      * </p>
526      *
527      * <p>
528      * Note that EcmaScript is best known by the JavaScript and ActionScript dialects.
529      * </p>
530      *
531      * <p>
532      * Example:
533      * </p>
534      *
535      * <pre>
536      * input string: He didn't say, "Stop!"
537      * output string: He didn\'t say, \"Stop!\"
538      * </pre>
539      * <p>
540      * <strong>Security Note.</strong> We only provide backslash escaping in this method. For example, {@code '\"'} has the output {@code '\\\"'} which could
541      * result in potential issues in the case where the string being escaped is being used in an HTML tag like {@code <select onmouseover="..." />}. If you wish
542      * to have more rigorous string escaping, you may consider the <a href="https://www.owasp.org/index.php/Category:OWASP_Enterprise_Security_API_JAVA">ESAPI
543      * Libraries</a>. Further, you can view the <a href="https://github.com/esapi">ESAPI GitHub Org</a>.
544      * </p>
545      *
546      * @param input String to escape values in, may be null.
547      * @return String with escaped values, {@code null} if null string input.
548      */
549     public static String escapeEcmaScript(final String input) {
550         return ESCAPE_ECMASCRIPT.translate(input);
551     }
552 
553     /**
554      * Escapes the characters in a {@code String} using HTML entities.
555      *
556      * <p>Supports only the HTML 3.0 entities.</p>
557      *
558      * @param input  the {@code String} to escape, may be null.
559      * @return a new escaped {@code String}, {@code null} if null string input.
560      */
561     public static String escapeHtml3(final String input) {
562         return ESCAPE_HTML3.translate(input);
563     }
564 
565     // HTML and XML
566     /**
567      * Escapes the characters in a {@code String} using HTML entities.
568      *
569      * <p>
570      * For example:
571      * </p>
572      * <p>{@code "bread" &amp; "butter"}</p>
573      * becomes:
574      * <p>
575      * {@code &quot;bread&quot; &amp;amp; &quot;butter&quot;}.
576      * </p>
577      *
578      * <p>Supports all known HTML 4.0 entities, including funky accents.
579      * Note that the commonly used apostrophe escape character (&amp;apos;)
580      * is not a legal entity and so is not supported).</p>
581      *
582      * @param input  the {@code String} to escape, may be null.
583      * @return a new escaped {@code String}, {@code null} if null string input.
584      * @see <a href="https://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
585      * @see <a href="https://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
586      * @see <a href="https://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
587      * @see <a href="https://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
588      * @see <a href="https://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
589      */
590     public static String escapeHtml4(final String input) {
591         return ESCAPE_HTML4.translate(input);
592     }
593 
594     // Java and JavaScript
595     /**
596      * Escapes the characters in a {@code String} using Java String rules.
597      *
598      * <p>
599      * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.)
600      * </p>
601      *
602      * <p>
603      * So a tab becomes the characters {@code '\\'} and {@code 't'}.
604      * </p>
605      *
606      * <p>
607      * The only difference between Java strings and JavaScript strings is that in JavaScript, a single quote and forward-slash (/) are escaped.
608      * </p>
609      *
610      * <p>
611      * Example:
612      * </p>
613      *
614      * <pre>
615      * input string: He didn't say, "Stop!"
616      * output string: He didn't say, \"Stop!\"
617      * </pre>
618      *
619      * @param input String to escape values in, may be null.
620      * @return String with escaped values, {@code null} if null string input.
621      */
622     public static String escapeJava(final String input) {
623         return ESCAPE_JAVA.translate(input);
624     }
625 
626     /**
627      * Escapes the characters in a {@code String} using Json String rules.
628      *
629      * <p>
630      * Escapes any values it finds into their Json String form. Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.)
631      * </p>
632      *
633      * <p>
634      * So a tab becomes the characters {@code '\\'} and {@code 't'}.
635      * </p>
636      *
637      * <p>
638      * The only difference between Java strings and Json strings is that in Json, forward-slash (/) is escaped.
639      * </p>
640      *
641      * <p>
642      * See http://www.ietf.org/rfc/rfc4627.txt for further details.
643      * </p>
644      *
645      * <p>
646      * Example:
647      * </p>
648      *
649      * <pre>
650      * input string: He didn't say, "Stop!"
651      * output string: He didn't say, \"Stop!\"
652      * </pre>
653      *
654      * @param input String to escape values in, may be null.
655      * @return String with escaped values, {@code null} if null string input.
656      */
657     public static String escapeJson(final String input) {
658         return ESCAPE_JSON.translate(input);
659     }
660 
661     /**
662      * Escapes the characters in a {@code String} using XML entities.
663      *
664      * <p>
665      * For example: {@code "bread" & "butter"} =&gt; {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
666      * </p>
667      *
668      * <p>
669      * Note that XML 1.0 is a text-only format: it cannot represent control characters or unpaired Unicode surrogate code points, even after escaping.
670      * {@code escapeXml10} will remove characters that do not fit in the following ranges:
671      * </p>
672      *
673      * <p>
674      * {@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}
675      * </p>
676      *
677      * <p>
678      * Though not strictly necessary, {@code escapeXml10} will escape characters in the following ranges:
679      * </p>
680      *
681      * <p>
682      * {@code [#x7F-#x84] | [#x86-#x9F]}
683      * </p>
684      *
685      * <p>
686      * The returned string can be inserted into a valid XML 1.0 or XML 1.1 document. If you want to allow more non-text characters in an XML 1.1 document, use
687      * {@link #escapeXml11(String)}.
688      * </p>
689      *
690      * @param input the {@code String} to escape, may be null.
691      * @return a new escaped {@code String}, {@code null} if null string input.
692      * @see #unescapeXml(String)
693      */
694     public static String escapeXml10(final String input) {
695         return ESCAPE_XML10.translate(input);
696     }
697 
698     /**
699      * Escapes the characters in a {@code String} using XML entities.
700      *
701      * <p>
702      * For example: {@code "bread" & "butter"} =&gt; {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
703      * </p>
704      *
705      * <p>
706      * XML 1.1 can represent certain control characters, but it cannot represent the null byte or unpaired Unicode surrogate code points, even after escaping.
707      * {@code escapeXml11} will remove characters that do not fit in the following ranges:
708      * </p>
709      *
710      * <p>
711      * {@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}
712      * </p>
713      *
714      * <p>
715      * {@code escapeXml11} will escape characters in the following ranges:
716      * </p>
717      *
718      * <p>
719      * {@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}
720      * </p>
721      *
722      * <p>
723      * The returned string can be inserted into a valid XML 1.1 document. Do not use it for XML 1.0 documents.
724      * </p>
725      *
726      * @param input the {@code String} to escape, may be null.
727      * @return a new escaped {@code String}, {@code null} if null string input.
728      * @see #unescapeXml(String)
729      */
730     public static String escapeXml11(final String input) {
731         return ESCAPE_XML11.translate(input);
732     }
733 
734     /**
735      * Escapes the characters in a {@code String} using XSI rules.
736      *
737      * <p>
738      * <strong>Beware!</strong> In most cases you don't want to escape shell commands but use multi-argument methods provided by {@link ProcessBuilder} or
739      * {@link Runtime#exec(String[])} instead.
740      * </p>
741      *
742      * <p>
743      * Example:
744      * </p>
745      *
746      * <pre>
747      * input string: He didn't say, "Stop!"
748      * output string: He\ didn\'t\ say,\ \"Stop!\"
749      * </pre>
750      *
751      * @param input String to escape values in, may be null.
752      * @return String with escaped values, {@code null} if null string input.
753      * @see <a href="https://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a>
754      */
755     public static String escapeXSI(final String input) {
756         return ESCAPE_XSI.translate(input);
757     }
758 
759     /**
760      * Returns a {@code String} value for an unescaped CSV column.
761      *
762      * <p>
763      * If the value is enclosed in double quotes, and contains a comma, newline or double quote, then quotes are removed.
764      * </p>
765      *
766      * <p>
767      * Any double quote escaped characters (a pair of double quotes) are unescaped to just one double quote.
768      * </p>
769      *
770      * <p>
771      * If the value is not enclosed in double quotes, or is and does not contain a comma, newline or double quote, then the String value is returned unchanged.
772      * </p>
773      *
774      * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and <a href="https://tools.ietf.org/html/rfc4180">RFC 4180</a>.
775      *
776      * @param input the input CSV column String, may be null.
777      * @return The input String, with enclosing double quotes removed and embedded double quotes unescaped, {@code null} if null string input.
778      */
779     public static String unescapeCsv(final String input) {
780         return UNESCAPE_CSV.translate(input);
781     }
782 
783     /**
784      * Unescapes any EcmaScript literals found in the {@code String}.
785      *
786      * <p>
787      * For example, it will turn a sequence of {@code '\'} and {@code 'n'} into a newline character, unless the {@code '\'} is preceded by another {@code '\'}.
788      * </p>
789      *
790      * @param input the {@code String} to unescape, may be null.
791      * @return A new unescaped {@code String}, {@code null} if null string input.
792      * @see #unescapeJava(String)
793      */
794     public static String unescapeEcmaScript(final String input) {
795         return UNESCAPE_ECMASCRIPT.translate(input);
796     }
797 
798     /**
799      * Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes. Supports only HTML 3.0
800      * entities.
801      *
802      * @param input the {@code String} to unescape, may be null.
803      * @return a new unescaped {@code String}, {@code null} if null string input.
804      */
805     public static String unescapeHtml3(final String input) {
806         return UNESCAPE_HTML3.translate(input);
807     }
808 
809     /**
810      * Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes. Supports HTML 4.0
811      * entities.
812      *
813      * <p>
814      * For example, the string {@code "&lt;Fran&ccedil;ais&gt;"} will become {@code "<Fran�ais>"}
815      * </p>
816      *
817      * <p>
818      * If an entity is unrecognized, it is left alone, and inserted verbatim into the result string. e.g. {@code "&gt;&zzzz;x"} will become {@code ">&zzzz;x"}.
819      * </p>
820      *
821      * @param input the {@code String} to unescape, may be null.
822      * @return a new unescaped {@code String}, {@code null} if null string input.
823      */
824     public static String unescapeHtml4(final String input) {
825         return UNESCAPE_HTML4.translate(input);
826     }
827 
828     /**
829      * Unescapes any Java literals found in the {@code String}. For example, it will turn a sequence of {@code '\'} and {@code 'n'} into a newline character,
830      * unless the {@code '\'} is preceded by another {@code '\'}.
831      *
832      * @param input the {@code String} to unescape, may be null.
833      * @return a new unescaped {@code String}, {@code null} if null string input.
834      */
835     public static String unescapeJava(final String input) {
836         return UNESCAPE_JAVA.translate(input);
837     }
838 
839     /**
840      * Unescapes any Json literals found in the {@code String}.
841      *
842      * <p>
843      * For example, it will turn a sequence of {@code '\'} and {@code 'n'} into a newline character, unless the {@code '\'} is preceded by another {@code '\'}.
844      * </p>
845      *
846      * @param input the {@code String} to unescape, may be null.
847      * @return A new unescaped {@code String}, {@code null} if null string input.
848      * @see #unescapeJava(String)
849      */
850     public static String unescapeJson(final String input) {
851         return UNESCAPE_JSON.translate(input);
852     }
853 
854     /**
855      * Unescapes a string containing XML entity escapes to a string containing the actual Unicode characters corresponding to the escapes.
856      *
857      * <p>
858      * Supports only the five basic XML entities (gt, lt, quot, amp, apos). Does not support DTDs or external entities.
859      * </p>
860      *
861      * <p>
862      * Note that numerical \\u Unicode codes are unescaped to their respective Unicode characters. This may change in future releases.
863      * </p>
864      *
865      * @param input the {@code String} to unescape, may be null.
866      * @return a new unescaped {@code String}, {@code null} if null string input.
867      * @see #escapeXml10(String)
868      * @see #escapeXml11(String)
869      */
870     public static String unescapeXml(final String input) {
871         return UNESCAPE_XML.translate(input);
872     }
873 
874     /**
875      * Unescapes the characters in a {@code String} using XSI rules.
876      *
877      * @param input the {@code String} to unescape, may be null.
878      * @return a new unescaped {@code String}, {@code null} if null string input.
879      * @see StringEscapeUtils#escapeXSI(String)
880      */
881     public static String unescapeXSI(final String input) {
882         return UNESCAPE_XSI.translate(input);
883     }
884 
885     /**
886      * {@code StringEscapeUtils} instances should NOT be constructed in standard programming.
887      *
888      * <p>
889      * Instead, the class should be used as:
890      * </p>
891      *
892      * <pre>
893      * StringEscapeUtils.escapeJava("foo");
894      * </pre>
895      *
896      * <p>
897      * This constructor is public to permit tools that require a JavaBean instance to operate.
898      * </p>
899      */
900     public StringEscapeUtils() {
901     }
902 
903 }