View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.lang3;
18  
19  import java.io.IOException;
20  import java.io.Writer;
21  
22  import org.apache.commons.lang3.text.translate.AggregateTranslator;
23  import org.apache.commons.lang3.text.translate.CharSequenceTranslator;
24  import org.apache.commons.lang3.text.translate.EntityArrays;
25  import org.apache.commons.lang3.text.translate.JavaUnicodeEscaper;
26  import org.apache.commons.lang3.text.translate.LookupTranslator;
27  import org.apache.commons.lang3.text.translate.NumericEntityEscaper;
28  import org.apache.commons.lang3.text.translate.NumericEntityUnescaper;
29  import org.apache.commons.lang3.text.translate.OctalUnescaper;
30  import org.apache.commons.lang3.text.translate.UnicodeUnescaper;
31  import org.apache.commons.lang3.text.translate.UnicodeUnpairedSurrogateRemover;
32  
33  /**
34   * Escapes and unescapes {@link String}s for
35   * Java, Java Script, HTML and XML.
36   *
37   * <p>#ThreadSafe#</p>
38   * @since 2.0
39   * @deprecated As of 3.6, use Apache Commons Text
40   * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/StringEscapeUtils.html">
41   * StringEscapeUtils</a> instead
42   */
43  @Deprecated
44  public class StringEscapeUtils {
45  
46      /* ESCAPE TRANSLATORS */
47  
48      // TODO: Create a parent class - 'SinglePassTranslator' ?
49      //       It would handle the index checking + length returning,
50      //       and could also have an optimization check method.
51      static class CsvEscaper extends CharSequenceTranslator {
52  
53          private static final char CSV_DELIMITER = ',';
54          private static final char CSV_QUOTE = '"';
55          private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
56          private static final char[] CSV_SEARCH_CHARS = { CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF };
57  
58          @Override
59          public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
60  
61              if (index != 0) {
62                  throw new IllegalStateException("CsvEscaper should never reach the [1] index");
63              }
64  
65              if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) {
66                  out.write(input.toString());
67              } else {
68                  out.write(CSV_QUOTE);
69                  out.write(StringUtils.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR));
70                  out.write(CSV_QUOTE);
71              }
72              return Character.codePointCount(input, 0, input.length());
73          }
74      }
75  
76      static class CsvUnescaper extends CharSequenceTranslator {
77  
78          private static final char CSV_DELIMITER = ',';
79          private static final char CSV_QUOTE = '"';
80          private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
81          private static final char[] CSV_SEARCH_CHARS = {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};
82  
83          @Override
84          public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
85  
86              if (index != 0) {
87                  throw new IllegalStateException("CsvUnescaper should never reach the [1] index");
88              }
89  
90              if ( input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE ) {
91                  out.write(input.toString());
92                  return Character.codePointCount(input, 0, input.length());
93              }
94  
95              // strip quotes
96              final String quoteless = input.subSequence(1, input.length() - 1).toString();
97  
98              if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) {
99                  // deal with escaped quotes; ie) ""
100                 out.write(StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR));
101             } else {
102                 out.write(input.toString());
103             }
104             return Character.codePointCount(input, 0, input.length());
105         }
106     }
107 
108     /**
109      * Translator object for escaping Java.
110      *
111      * While {@link #escapeJava(String)} is the expected method of use, this
112      * object allows the Java escaping functionality to be used
113      * as the foundation for a custom translator.
114      *
115      * @since 3.0
116      */
117     public static final CharSequenceTranslator ESCAPE_JAVA =
118           new LookupTranslator(
119             new String[][] {
120               {"\"", "\\\""},
121               {"\\", "\\\\"},
122           }).with(
123             new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE())
124           ).with(
125             JavaUnicodeEscaper.outsideOf(32, 0x7f)
126         );
127 
128     /**
129      * Translator object for escaping EcmaScript/JavaScript.
130      *
131      * While {@link #escapeEcmaScript(String)} is the expected method of use, this
132      * object allows the EcmaScript escaping functionality to be used
133      * as the foundation for a custom translator.
134      *
135      * @since 3.0
136      */
137     public static final CharSequenceTranslator ESCAPE_ECMASCRIPT =
138         new AggregateTranslator(
139             new LookupTranslator(
140                       new String[][] {
141                             {"'", "\\'"},
142                             {"\"", "\\\""},
143                             {"\\", "\\\\"},
144                             {"/", "\\/"}
145                       }),
146             new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
147             JavaUnicodeEscaper.outsideOf(32, 0x7f)
148         );
149 
150     /**
151      * Translator object for escaping Json.
152      *
153      * While {@link #escapeJson(String)} is the expected method of use, this
154      * object allows the Json escaping functionality to be used
155      * as the foundation for a custom translator.
156      *
157      * @since 3.2
158      */
159     public static final CharSequenceTranslator ESCAPE_JSON =
160         new AggregateTranslator(
161             new LookupTranslator(
162                       new String[][] {
163                             {"\"", "\\\""},
164                             {"\\", "\\\\"},
165                             {"/", "\\/"}
166                       }),
167             new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
168             JavaUnicodeEscaper.outsideOf(32, 0x7f)
169         );
170 
171     /**
172      * Translator object for escaping XML.
173      *
174      * While {@link #escapeXml(String)} is the expected method of use, this
175      * object allows the XML escaping functionality to be used
176      * as the foundation for a custom translator.
177      *
178      * @since 3.0
179      * @deprecated use {@link #ESCAPE_XML10} or {@link #ESCAPE_XML11} instead.
180      */
181     @Deprecated
182     public static final CharSequenceTranslator ESCAPE_XML =
183         new AggregateTranslator(
184             new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
185             new LookupTranslator(EntityArrays.APOS_ESCAPE())
186         );
187 
188     /**
189      * Translator object for escaping XML 1.0.
190      *
191      * While {@link #escapeXml10(String)} is the expected method of use, this
192      * object allows the XML escaping functionality to be used
193      * as the foundation for a custom translator.
194      *
195      * @since 3.3
196      */
197     public static final CharSequenceTranslator ESCAPE_XML10 =
198         new AggregateTranslator(
199             new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
200             new LookupTranslator(EntityArrays.APOS_ESCAPE()),
201             new LookupTranslator(
202                     new String[][] {
203                             { "\u0000", StringUtils.EMPTY },
204                             { "\u0001", StringUtils.EMPTY },
205                             { "\u0002", StringUtils.EMPTY },
206                             { "\u0003", StringUtils.EMPTY },
207                             { "\u0004", StringUtils.EMPTY },
208                             { "\u0005", StringUtils.EMPTY },
209                             { "\u0006", StringUtils.EMPTY },
210                             { "\u0007", StringUtils.EMPTY },
211                             { "\u0008", StringUtils.EMPTY },
212                             { "\u000b", StringUtils.EMPTY },
213                             { "\u000c", StringUtils.EMPTY },
214                             { "\u000e", StringUtils.EMPTY },
215                             { "\u000f", StringUtils.EMPTY },
216                             { "\u0010", StringUtils.EMPTY },
217                             { "\u0011", StringUtils.EMPTY },
218                             { "\u0012", StringUtils.EMPTY },
219                             { "\u0013", StringUtils.EMPTY },
220                             { "\u0014", StringUtils.EMPTY },
221                             { "\u0015", StringUtils.EMPTY },
222                             { "\u0016", StringUtils.EMPTY },
223                             { "\u0017", StringUtils.EMPTY },
224                             { "\u0018", StringUtils.EMPTY },
225                             { "\u0019", StringUtils.EMPTY },
226                             { "\u001a", StringUtils.EMPTY },
227                             { "\u001b", StringUtils.EMPTY },
228                             { "\u001c", StringUtils.EMPTY },
229                             { "\u001d", StringUtils.EMPTY },
230                             { "\u001e", StringUtils.EMPTY },
231                             { "\u001f", StringUtils.EMPTY },
232                             { "\ufffe", StringUtils.EMPTY },
233                             { "\uffff", StringUtils.EMPTY }
234                     }),
235             NumericEntityEscaper.between(0x7f, 0x84),
236             NumericEntityEscaper.between(0x86, 0x9f),
237             new UnicodeUnpairedSurrogateRemover()
238         );
239 
240     /**
241      * Translator object for escaping XML 1.1.
242      *
243      * While {@link #escapeXml11(String)} is the expected method of use, this
244      * object allows the XML escaping functionality to be used
245      * as the foundation for a custom translator.
246      *
247      * @since 3.3
248      */
249     public static final CharSequenceTranslator ESCAPE_XML11 =
250         new AggregateTranslator(
251             new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
252             new LookupTranslator(EntityArrays.APOS_ESCAPE()),
253             new LookupTranslator(
254                     new String[][] {
255                             { "\u0000", StringUtils.EMPTY },
256                             { "\u000b", "&#11;" },
257                             { "\u000c", "&#12;" },
258                             { "\ufffe", StringUtils.EMPTY },
259                             { "\uffff", StringUtils.EMPTY }
260                     }),
261             NumericEntityEscaper.between(0x1, 0x8),
262             NumericEntityEscaper.between(0xe, 0x1f),
263             NumericEntityEscaper.between(0x7f, 0x84),
264             NumericEntityEscaper.between(0x86, 0x9f),
265             new UnicodeUnpairedSurrogateRemover()
266         );
267 
268     /**
269      * Translator object for escaping HTML version 3.0.
270      *
271      * While {@link #escapeHtml3(String)} is the expected method of use, this
272      * object allows the HTML escaping functionality to be used
273      * as the foundation for a custom translator.
274      *
275      * @since 3.0
276      */
277     public static final CharSequenceTranslator ESCAPE_HTML3 =
278         new AggregateTranslator(
279             new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
280             new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE())
281         );
282 
283     /**
284      * Translator object for escaping HTML version 4.0.
285      *
286      * While {@link #escapeHtml4(String)} is the expected method of use, this
287      * object allows the HTML escaping functionality to be used
288      * as the foundation for a custom translator.
289      *
290      * @since 3.0
291      */
292     public static final CharSequenceTranslator ESCAPE_HTML4 =
293         new AggregateTranslator(
294             new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
295             new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()),
296             new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE())
297         );
298 
299     /* UNESCAPE TRANSLATORS */
300 
301     /**
302      * Translator object for escaping individual Comma Separated Values.
303      *
304      * While {@link #escapeCsv(String)} is the expected method of use, this
305      * object allows the CSV escaping functionality to be used
306      * as the foundation for a custom translator.
307      *
308      * @since 3.0
309      */
310     public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper();
311 
312     /**
313      * Translator object for unescaping escaped Java.
314      *
315      * While {@link #unescapeJava(String)} is the expected method of use, this
316      * object allows the Java unescaping functionality to be used
317      * as the foundation for a custom translator.
318      *
319      * @since 3.0
320      */
321     // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)?
322     public static final CharSequenceTranslator UNESCAPE_JAVA =
323         new AggregateTranslator(
324             new OctalUnescaper(),     // .between('\1', '\377'),
325             new UnicodeUnescaper(),
326             new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()),
327             new LookupTranslator(
328                       new String[][] {
329                             {"\\\\", "\\"},
330                             {"\\\"", "\""},
331                             {"\\'", "'"},
332                             {"\\", ""}
333                       })
334         );
335 
336     /**
337      * Translator object for unescaping escaped EcmaScript.
338      *
339      * While {@link #unescapeEcmaScript(String)} is the expected method of use, this
340      * object allows the EcmaScript unescaping functionality to be used
341      * as the foundation for a custom translator.
342      *
343      * @since 3.0
344      */
345     public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA;
346 
347     /**
348      * Translator object for unescaping escaped Json.
349      *
350      * While {@link #unescapeJson(String)} is the expected method of use, this
351      * object allows the Json unescaping functionality to be used
352      * as the foundation for a custom translator.
353      *
354      * @since 3.2
355      */
356     public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA;
357 
358     /**
359      * Translator object for unescaping escaped HTML 3.0.
360      *
361      * While {@link #unescapeHtml3(String)} is the expected method of use, this
362      * object allows the HTML unescaping functionality to be used
363      * as the foundation for a custom translator.
364      *
365      * @since 3.0
366      */
367     public static final CharSequenceTranslator UNESCAPE_HTML3 =
368         new AggregateTranslator(
369             new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
370             new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
371             new NumericEntityUnescaper()
372         );
373 
374     /**
375      * Translator object for unescaping escaped HTML 4.0.
376      *
377      * While {@link #unescapeHtml4(String)} is the expected method of use, this
378      * object allows the HTML unescaping functionality to be used
379      * as the foundation for a custom translator.
380      *
381      * @since 3.0
382      */
383     public static final CharSequenceTranslator UNESCAPE_HTML4 =
384         new AggregateTranslator(
385             new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
386             new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
387             new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()),
388             new NumericEntityUnescaper()
389         );
390 
391     /**
392      * Translator object for unescaping escaped XML.
393      *
394      * While {@link #unescapeXml(String)} is the expected method of use, this
395      * object allows the XML unescaping functionality to be used
396      * as the foundation for a custom translator.
397      *
398      * @since 3.0
399      */
400     public static final CharSequenceTranslator UNESCAPE_XML =
401         new AggregateTranslator(
402             new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
403             new LookupTranslator(EntityArrays.APOS_UNESCAPE()),
404             new NumericEntityUnescaper()
405         );
406 
407     /**
408      * Translator object for unescaping escaped Comma Separated Value entries.
409      *
410      * While {@link #unescapeCsv(String)} is the expected method of use, this
411      * object allows the CSV unescaping functionality to be used
412      * as the foundation for a custom translator.
413      *
414      * @since 3.0
415      */
416     public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper();
417 
418     /* Helper functions */
419 
420     /**
421      * Returns a {@link String} value for a CSV column enclosed in double quotes,
422      * if required.
423      *
424      * <p>If the value contains a comma, newline or double quote, then the
425      *    String value is returned enclosed in double quotes.</p>
426      *
427      * <p>Any double quote characters in the value are escaped with another double quote.</p>
428      *
429      * <p>If the value does not contain a comma, newline or double quote, then the
430      *    String value is returned unchanged.</p>
431      *
432      * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
433      * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>.
434      *
435      * @param input the input CSV column String, may be null
436      * @return the input String, enclosed in double quotes if the value contains a comma,
437      * newline or double quote, {@code null} if null string input
438      * @since 2.4
439      */
440     public static final String escapeCsv(final String input) {
441         return ESCAPE_CSV.translate(input);
442     }
443 
444     /**
445      * Escapes the characters in a {@link String} using EcmaScript String rules.
446      * <p>Escapes any values it finds into their EcmaScript String form.
447      * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
448      *
449      * <p>So a tab becomes the characters {@code '\\'} and
450      * {@code 't'}.</p>
451      *
452      * <p>The only difference between Java strings and EcmaScript strings
453      * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p>
454      *
455      * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects.</p>
456      *
457      * <p>Example:</p>
458      * <pre>
459      * input string: He didn't say, "Stop!"
460      * output string: He didn\'t say, \"Stop!\"
461      * </pre>
462      *
463      * @param input  String to escape values in, may be null
464      * @return String with escaped values, {@code null} if null string input
465      *
466      * @since 3.0
467      */
468     public static final String escapeEcmaScript(final String input) {
469         return ESCAPE_ECMASCRIPT.translate(input);
470     }
471 
472     /**
473      * Escapes the characters in a {@link String} using HTML entities.
474      * <p>Supports only the HTML 3.0 entities.</p>
475      *
476      * @param input  the {@link String} to escape, may be null
477      * @return a new escaped {@link String}, {@code null} if null string input
478      *
479      * @since 3.0
480      */
481     public static final String escapeHtml3(final String input) {
482         return ESCAPE_HTML3.translate(input);
483     }
484 
485     /**
486      * Escapes the characters in a {@link String} using HTML entities.
487      *
488      * <p>
489      * For example:
490      * </p>
491      * <p>{@code "bread" &amp; "butter"}</p>
492      * becomes:
493      * <p>
494      * {@code &amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;}.
495      * </p>
496      *
497      * <p>Supports all known HTML 4.0 entities, including funky accents.
498      * Note that the commonly used apostrophe escape character (&amp;apos;)
499      * is not a legal entity and so is not supported).</p>
500      *
501      * @param input  the {@link String} to escape, may be null
502      * @return a new escaped {@link String}, {@code null} if null string input
503      *
504      * @see <a href="https://web.archive.org/web/20060225074150/https://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
505      * @see <a href="https://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
506      * @see <a href="https://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
507      * @see <a href="https://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
508      * @see <a href="https://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
509      *
510      * @since 3.0
511      */
512     public static final String escapeHtml4(final String input) {
513         return ESCAPE_HTML4.translate(input);
514     }
515 
516     /**
517      * Escapes the characters in a {@link String} using Java String rules.
518      *
519      * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
520      *
521      * <p>So a tab becomes the characters {@code '\\'} and
522      * {@code 't'}.</p>
523      *
524      * <p>The only difference between Java strings and JavaScript strings
525      * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p>
526      *
527      * <p>Example:</p>
528      * <pre>
529      * input string: He didn't say, "Stop!"
530      * output string: He didn't say, \"Stop!\"
531      * </pre>
532      *
533      * @param input  String to escape values in, may be null
534      * @return String with escaped values, {@code null} if null string input
535      */
536     public static final String escapeJava(final String input) {
537         return ESCAPE_JAVA.translate(input);
538     }
539 
540     /**
541      * Escapes the characters in a {@link String} using Json String rules.
542      * <p>Escapes any values it finds into their Json String form.
543      * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
544      *
545      * <p>So a tab becomes the characters {@code '\\'} and
546      * {@code 't'}.</p>
547      *
548      * <p>The only difference between Java strings and Json strings
549      * is that in Json, forward-slash (/) is escaped.</p>
550      *
551      * <p>See https://www.ietf.org/rfc/rfc4627.txt for further details.</p>
552      *
553      * <p>Example:</p>
554      * <pre>
555      * input string: He didn't say, "Stop!"
556      * output string: He didn't say, \"Stop!\"
557      * </pre>
558      *
559      * @param input  String to escape values in, may be null
560      * @return String with escaped values, {@code null} if null string input
561      *
562      * @since 3.2
563      */
564     public static final String escapeJson(final String input) {
565         return ESCAPE_JSON.translate(input);
566     }
567 
568     /**
569      * Escapes the characters in a {@link String} using XML entities.
570      *
571      * <p>For example: {@code "bread" & "butter"} =&gt;
572      * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
573      * </p>
574      *
575      * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
576      * Does not support DTDs or external entities.</p>
577      *
578      * <p>Note that Unicode characters greater than 0x7f are as of 3.0, no longer
579      *    escaped. If you still wish this functionality, you can achieve it
580      *    via the following:
581      * {@code StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) );}</p>
582      *
583      * @param input  the {@link String} to escape, may be null
584      * @return a new escaped {@link String}, {@code null} if null string input
585      * @see #unescapeXml(String)
586      * @deprecated use {@link #escapeXml10(java.lang.String)} or {@link #escapeXml11(java.lang.String)} instead.
587      */
588     @Deprecated
589     public static final String escapeXml(final String input) {
590         return ESCAPE_XML.translate(input);
591     }
592 
593     /**
594      * Escapes the characters in a {@link String} using XML entities.
595      *
596      * <p>For example: {@code "bread" & "butter"} =&gt;
597      * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
598      * </p>
599      *
600      * <p>Note that XML 1.0 is a text-only format: it cannot represent control
601      * characters or unpaired Unicode surrogate code points, even after escaping.
602      * {@code escapeXml10} will remove characters that do not fit in the
603      * following ranges:</p>
604      *
605      * <p>{@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
606      *
607      * <p>Though not strictly necessary, {@code escapeXml10} will escape
608      * characters in the following ranges:</p>
609      *
610      * <p>{@code [#x7F-#x84] | [#x86-#x9F]}</p>
611      *
612      * <p>The returned string can be inserted into a valid XML 1.0 or XML 1.1
613      * document. If you want to allow more non-text characters in an XML 1.1
614      * document, use {@link #escapeXml11(String)}.</p>
615      *
616      * @param input  the {@link String} to escape, may be null
617      * @return a new escaped {@link String}, {@code null} if null string input
618      * @see #unescapeXml(String)
619      * @since 3.3
620      */
621     public static String escapeXml10(final String input) {
622         return ESCAPE_XML10.translate(input);
623     }
624 
625     /**
626      * Escapes the characters in a {@link String} using XML entities.
627      *
628      * <p>For example: {@code "bread" & "butter"} =&gt;
629      * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
630      * </p>
631      *
632      * <p>XML 1.1 can represent certain control characters, but it cannot represent
633      * the null byte or unpaired Unicode surrogate code points, even after escaping.
634      * {@code escapeXml11} will remove characters that do not fit in the following
635      * ranges:</p>
636      *
637      * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
638      *
639      * <p>{@code escapeXml11} will escape characters in the following ranges:</p>
640      *
641      * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p>
642      *
643      * <p>The returned string can be inserted into a valid XML 1.1 document. Do not
644      * use it for XML 1.0 documents.</p>
645      *
646      * @param input  the {@link String} to escape, may be null
647      * @return a new escaped {@link String}, {@code null} if null string input
648      * @see #unescapeXml(String)
649      * @since 3.3
650      */
651     public static String escapeXml11(final String input) {
652         return ESCAPE_XML11.translate(input);
653     }
654 
655     /**
656      * Returns a {@link String} value for an unescaped CSV column.
657      *
658      * <p>If the value is enclosed in double quotes, and contains a comma, newline
659      *    or double quote, then quotes are removed.
660      * </p>
661      *
662      * <p>Any double quote escaped characters (a pair of double quotes) are unescaped
663      *    to just one double quote.</p>
664      *
665      * <p>If the value is not enclosed in double quotes, or is and does not contain a
666      *    comma, newline or double quote, then the String value is returned unchanged.</p>
667      *
668      * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
669      * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>.
670      *
671      * @param input the input CSV column String, may be null
672      * @return the input String, with enclosing double quotes removed and embedded double
673      * quotes unescaped, {@code null} if null string input
674      * @since 2.4
675      */
676     public static final String unescapeCsv(final String input) {
677         return UNESCAPE_CSV.translate(input);
678     }
679 
680     /**
681      * Unescapes any EcmaScript literals found in the {@link String}.
682      *
683      * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
684      * into a newline character, unless the {@code '\'} is preceded by another
685      * {@code '\'}.</p>
686      *
687      * @see #unescapeJava(String)
688      * @param input  the {@link String} to unescape, may be null
689      * @return A new unescaped {@link String}, {@code null} if null string input
690      *
691      * @since 3.0
692      */
693     public static final String unescapeEcmaScript(final String input) {
694         return UNESCAPE_ECMASCRIPT.translate(input);
695     }
696 
697     /**
698      * Unescapes a string containing entity escapes to a string
699      * containing the actual Unicode characters corresponding to the
700      * escapes. Supports only HTML 3.0 entities.
701      *
702      * @param input  the {@link String} to unescape, may be null
703      * @return a new unescaped {@link String}, {@code null} if null string input
704      *
705      * @since 3.0
706      */
707     public static final String unescapeHtml3(final String input) {
708         return UNESCAPE_HTML3.translate(input);
709     }
710 
711     /**
712      * Unescapes a string containing entity escapes to a string
713      * containing the actual Unicode characters corresponding to the
714      * escapes. Supports HTML 4.0 entities.
715      *
716      * <p>For example, the string {@code "&lt;Fran&ccedil;ais&gt;"}
717      * will become {@code "<Français>"}</p>
718      *
719      * <p>If an entity is unrecognized, it is left alone, and inserted
720      * verbatim into the result string. e.g. {@code "&gt;&zzzz;x"} will
721      * become {@code ">&zzzz;x"}.</p>
722      *
723      * @param input  the {@link String} to unescape, may be null
724      * @return a new unescaped {@link String}, {@code null} if null string input
725      *
726      * @since 3.0
727      */
728     public static final String unescapeHtml4(final String input) {
729         return UNESCAPE_HTML4.translate(input);
730     }
731 
732     /**
733      * Unescapes any Java literals found in the {@link String}.
734      * For example, it will turn a sequence of {@code '\'} and
735      * {@code 'n'} into a newline character, unless the {@code '\'}
736      * is preceded by another {@code '\'}.
737      *
738      * @param input  the {@link String} to unescape, may be null
739      * @return a new unescaped {@link String}, {@code null} if null string input
740      */
741     public static final String unescapeJava(final String input) {
742         return UNESCAPE_JAVA.translate(input);
743     }
744 
745     /**
746      * Unescapes any Json literals found in the {@link String}.
747      *
748      * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
749      * into a newline character, unless the {@code '\'} is preceded by another
750      * {@code '\'}.</p>
751      *
752      * @see #unescapeJava(String)
753      * @param input  the {@link String} to unescape, may be null
754      * @return A new unescaped {@link String}, {@code null} if null string input
755      *
756      * @since 3.2
757      */
758     public static final String unescapeJson(final String input) {
759         return UNESCAPE_JSON.translate(input);
760     }
761 
762 
763     /**
764      * Unescapes a string containing XML entity escapes to a string
765      * containing the actual Unicode characters corresponding to the
766      * escapes.
767      *
768      * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
769      * Does not support DTDs or external entities.</p>
770      *
771      * <p>Note that numerical \\u Unicode codes are unescaped to their respective
772      *    Unicode characters. This may change in future releases.</p>
773      *
774      * @param input  the {@link String} to unescape, may be null
775      * @return a new unescaped {@link String}, {@code null} if null string input
776      * @see #escapeXml(String)
777      * @see #escapeXml10(String)
778      * @see #escapeXml11(String)
779      */
780     public static final String unescapeXml(final String input) {
781         return UNESCAPE_XML.translate(input);
782     }
783 
784     /**
785      * {@link StringEscapeUtils} instances should NOT be constructed in
786      * standard programming.
787      *
788      * <p>Instead, the class should be used as:</p>
789      * <pre>StringEscapeUtils.escapeJava("foo");</pre>
790      *
791      * <p>This constructor is public to permit tools that require a JavaBean
792      * instance to operate.</p>
793      */
794     public StringEscapeUtils() {
795     }
796 
797 }