View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.text;
18  
19  import org.apache.commons.lang3.CharUtils;
20  import org.apache.commons.lang3.StringUtils;
21  import org.apache.commons.text.translate.AggregateTranslator;
22  import org.apache.commons.text.translate.CharSequenceTranslator;
23  import org.apache.commons.text.translate.EntityArrays;
24  import org.apache.commons.text.translate.JavaUnicodeEscaper;
25  import org.apache.commons.text.translate.LookupTranslator;
26  import org.apache.commons.text.translate.NumericEntityEscaper;
27  import org.apache.commons.text.translate.NumericEntityUnescaper;
28  import org.apache.commons.text.translate.OctalUnescaper;
29  import org.apache.commons.text.translate.SingleLookupTranslator;
30  import org.apache.commons.text.translate.UnicodeUnescaper;
31  import org.apache.commons.text.translate.UnicodeUnpairedSurrogateRemover;
32  
33  import java.io.IOException;
34  import java.io.Writer;
35  
36  /**
37   * <p>Escapes and unescapes {@code String}s for
38   * Java, Java Script, HTML and XML.</p>
39   *
40   * <p>#ThreadSafe#</p>
41   *
42   *
43   * <p>
44   * This code has been adapted from Apache Commons Lang 3.5.
45   * </p>
46   *
47   * @since 1.0
48   */
49  public class StringEscapeUtils {
50  
51      /* ESCAPE TRANSLATORS */
52  
53      /**
54       * Translator object for escaping Java. 
55       *
56       * While {@link #escapeJava(String)} is the expected method of use, this 
57       * object allows the Java escaping functionality to be used 
58       * as the foundation for a custom translator.
59       */
60      public static final CharSequenceTranslator ESCAPE_JAVA =
61              new LookupTranslator(
62                      new String[][] {
63                              {"\"", "\\\""},
64                              {"\\", "\\\\"},
65                      }).with(
66                      new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE())
67              ).with(
68                      JavaUnicodeEscaper.outsideOf(32, 0x7f)
69              );
70  
71      /**
72       * Translator object for escaping EcmaScript/JavaScript. 
73       *
74       * While {@link #escapeEcmaScript(String)} is the expected method of use, this 
75       * object allows the EcmaScript escaping functionality to be used 
76       * as the foundation for a custom translator.
77       */
78      public static final CharSequenceTranslator ESCAPE_ECMASCRIPT =
79              new AggregateTranslator(
80                      new LookupTranslator(
81                              new String[][] {
82                                      {"'", "\\'"},
83                                      {"\"", "\\\""},
84                                      {"\\", "\\\\"},
85                                      {"/", "\\/"}
86                              }),
87                      new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
88                      JavaUnicodeEscaper.outsideOf(32, 0x7f)
89              );
90  
91      /**
92       * Translator object for escaping Json.
93       *
94       * While {@link #escapeJson(String)} is the expected method of use, this
95       * object allows the Json escaping functionality to be used
96       * as the foundation for a custom translator.
97       */
98      public static final CharSequenceTranslator ESCAPE_JSON =
99              new AggregateTranslator(
100                     new LookupTranslator(
101                             new String[][] {
102                                     {"\"", "\\\""},
103                                     {"\\", "\\\\"},
104                                     {"/", "\\/"}
105                             }),
106                     new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
107                     JavaUnicodeEscaper.outsideOf(32, 0x7f)
108             );
109 
110     /**
111      * Translator object for escaping XML 1.0.
112      *
113      * While {@link #escapeXml10(String)} is the expected method of use, this
114      * object allows the XML escaping functionality to be used
115      * as the foundation for a custom translator.
116      */
117     public static final CharSequenceTranslator ESCAPE_XML10 =
118             new AggregateTranslator(
119                     new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
120                     new LookupTranslator(EntityArrays.APOS_ESCAPE()),
121                     new LookupTranslator(
122                             new String[][] {
123                                     { "\u0000", StringUtils.EMPTY },
124                                     { "\u0001", StringUtils.EMPTY },
125                                     { "\u0002", StringUtils.EMPTY },
126                                     { "\u0003", StringUtils.EMPTY },
127                                     { "\u0004", StringUtils.EMPTY },
128                                     { "\u0005", StringUtils.EMPTY },
129                                     { "\u0006", StringUtils.EMPTY },
130                                     { "\u0007", StringUtils.EMPTY },
131                                     { "\u0008", StringUtils.EMPTY },
132                                     { "\u000b", StringUtils.EMPTY },
133                                     { "\u000c", StringUtils.EMPTY },
134                                     { "\u000e", StringUtils.EMPTY },
135                                     { "\u000f", StringUtils.EMPTY },
136                                     { "\u0010", StringUtils.EMPTY },
137                                     { "\u0011", StringUtils.EMPTY },
138                                     { "\u0012", StringUtils.EMPTY },
139                                     { "\u0013", StringUtils.EMPTY },
140                                     { "\u0014", StringUtils.EMPTY },
141                                     { "\u0015", StringUtils.EMPTY },
142                                     { "\u0016", StringUtils.EMPTY },
143                                     { "\u0017", StringUtils.EMPTY },
144                                     { "\u0018", StringUtils.EMPTY },
145                                     { "\u0019", StringUtils.EMPTY },
146                                     { "\u001a", StringUtils.EMPTY },
147                                     { "\u001b", StringUtils.EMPTY },
148                                     { "\u001c", StringUtils.EMPTY },
149                                     { "\u001d", StringUtils.EMPTY },
150                                     { "\u001e", StringUtils.EMPTY },
151                                     { "\u001f", StringUtils.EMPTY },
152                                     { "\ufffe", StringUtils.EMPTY },
153                                     { "\uffff", StringUtils.EMPTY }
154                             }),
155                     NumericEntityEscaper.between(0x7f, 0x84),
156                     NumericEntityEscaper.between(0x86, 0x9f),
157                     new UnicodeUnpairedSurrogateRemover()
158             );
159 
160     /**
161      * Translator object for escaping XML 1.1.
162      *
163      * While {@link #escapeXml11(String)} is the expected method of use, this
164      * object allows the XML escaping functionality to be used
165      * as the foundation for a custom translator.
166      */
167     public static final CharSequenceTranslator ESCAPE_XML11 =
168             new AggregateTranslator(
169                     new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
170                     new LookupTranslator(EntityArrays.APOS_ESCAPE()),
171                     new LookupTranslator(
172                             new String[][] {
173                                     { "\u0000", StringUtils.EMPTY },
174                                     { "\u000b", "&#11;" },
175                                     { "\u000c", "&#12;" },
176                                     { "\ufffe", StringUtils.EMPTY },
177                                     { "\uffff", StringUtils.EMPTY }
178                             }),
179                     NumericEntityEscaper.between(0x1, 0x8),
180                     NumericEntityEscaper.between(0xe, 0x1f),
181                     NumericEntityEscaper.between(0x7f, 0x84),
182                     NumericEntityEscaper.between(0x86, 0x9f),
183                     new UnicodeUnpairedSurrogateRemover()
184             );
185 
186     /**
187      * Translator object for escaping HTML version 3.0.
188      *
189      * While {@link #escapeHtml3(String)} is the expected method of use, this 
190      * object allows the HTML escaping functionality to be used 
191      * as the foundation for a custom translator.
192      */
193     public static final CharSequenceTranslator ESCAPE_HTML3 =
194             new AggregateTranslator(
195                     new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
196                     new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE())
197             );
198 
199     /**
200      * The improved translator object for escaping HTML version 3.0.
201      * The 'improved' part of this translator is that it checks if the html is already translated.
202      * This check prevents double, triple, or recursive translations.
203      *
204      * While {@link #escapeHtml3Once(String)} is the expected method of use, this
205      * object allows the HTML escaping functionality to be used
206      * as the foundation for a custom translator.
207      *
208      * Note that, multiple lookup tables should be passed to this translator
209      * instead of passing multiple instances of this translator to the
210      * AggregateTranslator. Because, a SingleLookupTranslator only checks the values of the
211      * lookup table passed to that instance while deciding whether a value is
212      * already translated or not.
213      */
214     public static final CharSequenceTranslator ESCAPE_HTML3_ONCE =
215             new SingleLookupTranslator(EntityArrays.BASIC_ESCAPE(), EntityArrays.ISO8859_1_ESCAPE());
216 
217 
218     /**
219      * Translator object for escaping HTML version 4.0.
220      *
221      * While {@link #escapeHtml4(String)} is the expected method of use, this 
222      * object allows the HTML escaping functionality to be used 
223      * as the foundation for a custom translator.
224      */
225     public static final CharSequenceTranslator ESCAPE_HTML4 =
226             new AggregateTranslator(
227                     new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
228                     new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()),
229                     new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE())
230             );
231 
232     /**
233      * The improved translator object for escaping HTML version 4.0.
234      * The 'improved' part of this translator is that it checks if the html is already translated.
235      * This check prevents double, triple, or recursive translations.
236      *
237      * While {@link #escapeHtml4Once(String)} is the expected method of use, this
238      * object allows the HTML escaping functionality to be used
239      * as the foundation for a custom translator.
240      *
241      * Note that, multiple lookup tables should be passed to this translator
242      * instead of passing multiple instances of this translator to the
243      * AggregateTranslator. Because, a SingleLookupTranslator only checks the values of the
244      * lookup table passed to that instance while deciding whether a value is
245      * already translated or not.
246      */
247     public static final CharSequenceTranslator ESCAPE_HTML4_ONCE =
248             new SingleLookupTranslator(EntityArrays.BASIC_ESCAPE(), EntityArrays.ISO8859_1_ESCAPE(), EntityArrays.HTML40_EXTENDED_ESCAPE());
249 
250     /**
251      * Translator object for escaping individual Comma Separated Values. 
252      *
253      * While {@link #escapeCsv(String)} is the expected method of use, this 
254      * object allows the CSV escaping functionality to be used 
255      * as the foundation for a custom translator.
256      */
257     public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper();
258 
259     // TODO: Create a parent class - 'SinglePassTranslator' ?
260     //       It would handle the index checking + length returning, 
261     //       and could also have an optimization check method.
262     static class CsvEscaper extends CharSequenceTranslator {
263 
264         private static final char CSV_DELIMITER = ',';
265         private static final char CSV_QUOTE = '"';
266         private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
267         private static final char[] CSV_SEARCH_CHARS =
268                 new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};
269 
270         @Override
271         public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
272 
273             if(index != 0) {
274                 throw new IllegalStateException("CsvEscaper should never reach the [1] index");
275             }
276 
277             if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) {
278                 out.write(input.toString());
279             } else {
280                 out.write(CSV_QUOTE);
281                 out.write(StringUtils.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR));
282                 out.write(CSV_QUOTE);
283             }
284             return Character.codePointCount(input, 0, input.length());
285         }
286     }
287 
288     /**
289      * Translator object for escaping Shell command language.
290      *
291      * @see <a href="http://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a>
292      */
293     public static final CharSequenceTranslator ESCAPE_XSI =
294         new LookupTranslator(
295             new String[][] {
296                     {"|", "\\|"},
297                     {"&", "\\&"},
298                     {";", "\\;"},
299                     {"<", "\\<"},
300                     {">", "\\>"},
301                     {"(", "\\("},
302                     {")", "\\)"},
303                     {"$", "\\$"},
304                     {"`", "\\`"},
305                     {"\\", "\\\\"},
306                     {"\"", "\\\""},
307                     {"'", "\\'"},
308                     {" ", "\\ "},
309                     {"\t", "\\\t"},
310                     {"\r\n", ""},
311                     {"\n", ""},
312                     {"*", "\\*"},
313                     {"?", "\\?"},
314                     {"[", "\\["},
315                     {"#", "\\#"},
316                     {"~", "\\~"},
317                     {"=", "\\="},
318                     {"%", "\\%"},
319             });
320 
321     /* UNESCAPE TRANSLATORS */
322 
323     /**
324      * Translator object for unescaping escaped Java. 
325      *
326      * While {@link #unescapeJava(String)} is the expected method of use, this 
327      * object allows the Java unescaping functionality to be used 
328      * as the foundation for a custom translator.
329      */
330     // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)?
331     public static final CharSequenceTranslator UNESCAPE_JAVA =
332             new AggregateTranslator(
333                     new OctalUnescaper(),     // .between('\1', '\377'),
334                     new UnicodeUnescaper(),
335                     new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()),
336                     new LookupTranslator(
337                             new String[][] {
338                                     {"\\\\", "\\"},
339                                     {"\\\"", "\""},
340                                     {"\\'", "'"},
341                                     {"\\", ""}
342                             })
343             );
344 
345     /**
346      * Translator object for unescaping escaped EcmaScript. 
347      *
348      * While {@link #unescapeEcmaScript(String)} is the expected method of use, this 
349      * object allows the EcmaScript unescaping functionality to be used 
350      * as the foundation for a custom translator.
351      */
352     public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA;
353 
354     /**
355      * Translator object for unescaping escaped Json.
356      *
357      * While {@link #unescapeJson(String)} is the expected method of use, this
358      * object allows the Json unescaping functionality to be used
359      * as the foundation for a custom translator.
360      */
361     public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA;
362 
363     /**
364      * Translator object for unescaping escaped HTML 3.0. 
365      *
366      * While {@link #unescapeHtml3(String)} is the expected method of use, this 
367      * object allows the HTML unescaping functionality to be used 
368      * as the foundation for a custom translator.
369      */
370     public static final CharSequenceTranslator UNESCAPE_HTML3 =
371             new AggregateTranslator(
372                     new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
373                     new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
374                     new NumericEntityUnescaper()
375             );
376 
377     /**
378      * Translator object for unescaping escaped HTML 4.0. 
379      *
380      * While {@link #unescapeHtml4(String)} is the expected method of use, this 
381      * object allows the HTML unescaping functionality to be used 
382      * as the foundation for a custom translator.
383      */
384     public static final CharSequenceTranslator UNESCAPE_HTML4 =
385             new AggregateTranslator(
386                     new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
387                     new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
388                     new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()),
389                     new NumericEntityUnescaper()
390             );
391 
392     /**
393      * Translator object for unescaping escaped XML.
394      *
395      * While {@link #unescapeXml(String)} is the expected method of use, this 
396      * object allows the XML unescaping functionality to be used 
397      * as the foundation for a custom translator.
398      */
399     public static final CharSequenceTranslator UNESCAPE_XML =
400             new AggregateTranslator(
401                     new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
402                     new LookupTranslator(EntityArrays.APOS_UNESCAPE()),
403                     new NumericEntityUnescaper()
404             );
405 
406     /**
407      * Translator object for unescaping escaped Comma Separated Value entries.
408      *
409      * While {@link #unescapeCsv(String)} is the expected method of use, this 
410      * object allows the CSV unescaping functionality to be used 
411      * as the foundation for a custom translator.
412      */
413     public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper();
414 
415     static class CsvUnescaper extends CharSequenceTranslator {
416 
417         private static final char CSV_DELIMITER = ',';
418         private static final char CSV_QUOTE = '"';
419         private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
420         private static final char[] CSV_SEARCH_CHARS =
421                 new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};
422 
423         @Override
424         public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
425 
426             if(index != 0) {
427                 throw new IllegalStateException("CsvUnescaper should never reach the [1] index");
428             }
429 
430             if ( input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE ) {
431                 out.write(input.toString());
432                 return Character.codePointCount(input, 0, input.length());
433             }
434 
435             // strip quotes
436             final String quoteless = input.subSequence(1, input.length() - 1).toString();
437 
438             if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) {
439                 // deal with escaped quotes; ie) ""
440                 out.write(StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR));
441             } else {
442                 out.write(input.toString());
443             }
444             return Character.codePointCount(input, 0, input.length());
445         }
446     }
447 
448     public static final CharSequenceTranslator UNESCAPE_XSI = new XsiUnescaper();
449 
450     /**
451      * Translator object for unescaping backslash escaped entries.
452      */
453     static class XsiUnescaper extends CharSequenceTranslator {
454 
455         private static final char BACKSLASH = '\\';
456 
457         @Override
458         public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
459 
460             if(index != 0) {
461                 throw new IllegalStateException("XsiUnescaper should never reach the [1] index");
462             }
463 
464             String s = input.toString();
465 
466             int segmentStart = 0;
467             int searchOffset = 0;
468             while (true) {
469                 int pos = s.indexOf(BACKSLASH, searchOffset);
470                 if (pos == -1) {
471                     if (segmentStart < s.length()) {
472                         out.write(s.substring(segmentStart));
473                     }
474                     break;
475                 }
476                 if (pos > segmentStart) {
477                     out.write(s.substring(segmentStart, pos));
478                 }
479                 segmentStart = pos + 1;
480                 searchOffset = pos + 2;
481             }
482 
483             return Character.codePointCount(input, 0, input.length());
484         }
485     }
486 
487     /* Helper functions */
488 
489     /**
490      * <p>{@code StringEscapeUtils} instances should NOT be constructed in
491      * standard programming.</p>
492      *
493      * <p>Instead, the class should be used as:</p>
494      * <pre>StringEscapeUtils.escapeJava("foo");</pre>
495      *
496      * <p>This constructor is public to permit tools that require a JavaBean
497      * instance to operate.</p>
498      */
499     public StringEscapeUtils() {
500         super();
501     }
502 
503     /**
504      * <p>Convenience wrapper for {@link java.lang.StringBuilder} providing escape methods.</p>
505      *
506      * <p>Example:</p>
507      * <pre>
508      * new Builder(ESCAPE_HTML4)
509      *      .append("&lt;p&gt;")
510      *      .escape("This is paragraph 1 and special chars like &amp; get escaped.")
511      *      .append("&lt;/p&gt;&lt;p&gt;")
512      *      .escape("This is paragraph 2 &amp; more...")
513      *      .append("&lt;/p&gt;")
514      *      .toString()
515      * </pre>
516      *
517      */
518     public static class Builder {
519 
520         private final StringBuilder sb;
521         private final CharSequenceTranslator translator;
522 
523         private Builder(final CharSequenceTranslator translator) {
524             this.sb = new StringBuilder();
525             this.translator = translator;
526         }
527 
528         /**
529          * <p>Escape {@code input} according to the given {@link CharSequenceTranslator}.</p>
530          *
531          * @param input the String to escape
532          * @return {@code this}, to enable chaining
533          */
534         public Builder escape(final String input) {
535             sb.append(translator.translate(input));
536             return this;
537         }
538 
539         /**
540          * Literal append, no escaping being done.
541          *
542          * @param input the String to append
543          * @return {@code this}, to enable chaining
544          */
545         public Builder append(final String input) {
546             sb.append(input);
547             return this;
548         }
549 
550         /**
551          * <p>Return the escaped string.</p>
552          *
553          * @return the escaped string
554          */
555         @Override
556         public String toString() {
557             return sb.toString();
558         }
559     }
560 
561     /**
562      * Get a {@link Builder}.
563      * @param translator the text translator
564      * @return {@link Builder}
565      */
566     public static StringEscapeUtils.Builder builder(final CharSequenceTranslator translator) {
567         return new Builder(translator);
568     }
569 
570     // Java and JavaScript
571     //--------------------------------------------------------------------------
572     /**
573      * <p>Escapes the characters in a {@code String} using Java String rules.</p>
574      *
575      * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
576      *
577      * <p>So a tab becomes the characters {@code '\\'} and
578      * {@code 't'}.</p>
579      *
580      * <p>The only difference between Java strings and JavaScript strings
581      * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p>
582      *
583      * <p>Example:</p>
584      * <pre>
585      * input string: He didn't say, "Stop!"
586      * output string: He didn't say, \"Stop!\"
587      * </pre>
588      *
589      * @param input  String to escape values in, may be null
590      * @return String with escaped values, {@code null} if null string input
591      */
592     public static final String escapeJava(final String input) {
593         return ESCAPE_JAVA.translate(input);
594     }
595 
596     /**
597      * <p>Escapes the characters in a {@code String} using EcmaScript String rules.</p>
598      * <p>Escapes any values it finds into their EcmaScript String form.
599      * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
600      *
601      * <p>So a tab becomes the characters {@code '\\'} and
602      * {@code 't'}.</p>
603      *
604      * <p>The only difference between Java strings and EcmaScript strings
605      * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p>
606      *
607      * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects. </p>
608      *
609      * <p>Example:</p>
610      * <pre>
611      * input string: He didn't say, "Stop!"
612      * output string: He didn\'t say, \"Stop!\"
613      * </pre>
614      *
615      * <b>Security Note.</b> We only provide backslash escaping in this method. For example, {@code '\"'} has the output
616      * {@code '\\\"'} which could result in potential issues in the case where the string being escaped is being used
617      * in an HTML tag like {@code <select onmouseover="..." />}. If you wish to have more rigorous string escaping, you
618      * may consider the
619      * <a href="https://www.owasp.org/index.php/Category:OWASP_Enterprise_Security_API_JAVA">ESAPI Libraries</a>. Further,
620      * you can view the
621      * <a href="https://github.com/esapi">ESAPI GitHub Org</a>.
622      *
623      * @param input  String to escape values in, may be null
624      * @return String with escaped values, {@code null} if null string input
625      */
626     public static final String escapeEcmaScript(final String input) {
627         return ESCAPE_ECMASCRIPT.translate(input);
628     }
629 
630     /**
631      * <p>Escapes the characters in a {@code String} using Json String rules.</p>
632      * <p>Escapes any values it finds into their Json String form.
633      * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
634      *
635      * <p>So a tab becomes the characters {@code '\\'} and
636      * {@code 't'}.</p>
637      *
638      * <p>The only difference between Java strings and Json strings
639      * is that in Json, forward-slash (/) is escaped.</p>
640      *
641      * <p>See http://www.ietf.org/rfc/rfc4627.txt for further details. </p>
642      *
643      * <p>Example:</p>
644      * <pre>
645      * input string: He didn't say, "Stop!"
646      * output string: He didn't say, \"Stop!\"
647      * </pre>
648      *
649      * @param input  String to escape values in, may be null
650      * @return String with escaped values, {@code null} if null string input
651      */
652     public static final String escapeJson(final String input) {
653         return ESCAPE_JSON.translate(input);
654     }
655 
656     /**
657      * <p>Unescapes any Java literals found in the {@code String}.
658      * For example, it will turn a sequence of {@code '\'} and
659      * {@code 'n'} into a newline character, unless the {@code '\'}
660      * is preceded by another {@code '\'}.</p>
661      *
662      * @param input  the {@code String} to unescape, may be null
663      * @return a new unescaped {@code String}, {@code null} if null string input
664      */
665     public static final String unescapeJava(final String input) {
666         return UNESCAPE_JAVA.translate(input);
667     }
668 
669     /**
670      * <p>Unescapes any EcmaScript literals found in the {@code String}.</p>
671      *
672      * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
673      * into a newline character, unless the {@code '\'} is preceded by another
674      * {@code '\'}.</p>
675      *
676      * @see #unescapeJava(String)
677      * @param input  the {@code String} to unescape, may be null
678      * @return A new unescaped {@code String}, {@code null} if null string input
679      */
680     public static final String unescapeEcmaScript(final String input) {
681         return UNESCAPE_ECMASCRIPT.translate(input);
682     }
683 
684     /**
685      * <p>Unescapes any Json literals found in the {@code String}.</p>
686      *
687      * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
688      * into a newline character, unless the {@code '\'} is preceded by another
689      * {@code '\'}.</p>
690      *
691      * @see #unescapeJava(String)
692      * @param input  the {@code String} to unescape, may be null
693      * @return A new unescaped {@code String}, {@code null} if null string input
694      */
695     public static final String unescapeJson(final String input) {
696         return UNESCAPE_JSON.translate(input);
697     }
698 
699     // HTML and XML
700     //--------------------------------------------------------------------------
701     /**
702      * <p>Escapes the characters in a {@code String} using HTML entities.</p>
703      *
704      * <p>
705      * For example:
706      * </p> 
707      * <p><code>"bread" &amp; "butter"</code></p>
708      * becomes:
709      * <p>
710      * <code>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</code>.
711      * </p>
712      *
713      * <p>Supports all known HTML 4.0 entities, including funky accents.
714      * Note that the commonly used apostrophe escape character (&amp;apos;)
715      * is not a legal entity and so is not supported). </p>
716      *
717      * @param input  the {@code String} to escape, may be null
718      * @return a new escaped {@code String}, {@code null} if null string input
719      *
720      * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
721      * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
722      * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
723      * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
724      * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
725      */
726     public static final String escapeHtml4(final String input) {
727         return ESCAPE_HTML4.translate(input);
728     }
729 
730     /**
731      * <p>Escapes the characters in a {@code String} using HTML entities.
732      * But escapes them only once. i.e. does not escape already escaped characters.</p>
733      *
734      * <p>
735      * For example:
736      * </p>
737      * <p><code>"bread" &amp; "butter"</code></p>
738      * becomes:
739      * <p>
740      * <code>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</code>.
741      * </p>
742      *
743      * <p>
744      * But:
745      * </p>
746      * <p><code>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</code></p>
747      * remains unaffected.
748      *
749      * <p>Supports all known HTML 4.0 entities, including funky accents.
750      * Note that the commonly used apostrophe escape character (&amp;apos;)
751      * is not a legal entity and so is not supported). </p>
752      *
753      * @param input  the {@code String} to escape, may be null
754      * @return a new escaped {@code String}, {@code null} if null string input
755      *
756      * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
757      * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
758      * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
759      * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
760      * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
761      */
762     public static final String escapeHtml4Once(final String input) {
763         return ESCAPE_HTML4_ONCE.translate(input);
764     }
765 
766 
767     /**
768      * <p>Escapes the characters in a {@code String} using HTML entities.</p>
769      * <p>Supports only the HTML 3.0 entities. </p>
770      *
771      * @param input  the {@code String} to escape, may be null
772      * @return a new escaped {@code String}, {@code null} if null string input
773      */
774     public static final String escapeHtml3(final String input) {
775         return ESCAPE_HTML3.translate(input);
776     }
777 
778     /**
779      * <p>Escapes the characters in a {@code String} using HTML entities.
780      * But escapes them only once. i.e. does not escape already escaped characters.</p>
781      * <p>Supports only the HTML 3.0 entities. </p>
782      *
783      * @param input  the {@code String} to escape, may be null
784      * @return a new escaped {@code String}, {@code null} if null string input
785      */
786     public static final String escapeHtml3Once(final String input) {
787         return ESCAPE_HTML3_ONCE.translate(input);
788     }
789 
790     //-----------------------------------------------------------------------
791     /**
792      * <p>Unescapes a string containing entity escapes to a string
793      * containing the actual Unicode characters corresponding to the
794      * escapes. Supports HTML 4.0 entities.</p>
795      *
796      * <p>For example, the string {@code "&lt;Fran&ccedil;ais&gt;"}
797      * will become {@code "<Français>"}</p>
798      *
799      * <p>If an entity is unrecognized, it is left alone, and inserted
800      * verbatim into the result string. e.g. {@code "&gt;&zzzz;x"} will
801      * become {@code ">&zzzz;x"}.</p>
802      *
803      * @param input  the {@code String} to unescape, may be null
804      * @return a new unescaped {@code String}, {@code null} if null string input
805      */
806     public static final String unescapeHtml4(final String input) {
807         return UNESCAPE_HTML4.translate(input);
808     }
809 
810     /**
811      * <p>Unescapes a string containing entity escapes to a string
812      * containing the actual Unicode characters corresponding to the
813      * escapes. Supports only HTML 3.0 entities.</p>
814      *
815      * @param input  the {@code String} to unescape, may be null
816      * @return a new unescaped {@code String}, {@code null} if null string input
817      */
818     public static final String unescapeHtml3(final String input) {
819         return UNESCAPE_HTML3.translate(input);
820     }
821 
822     /**
823      * <p>Escapes the characters in a {@code String} using XML entities.</p>
824      *
825      * <p>For example: {@code "bread" & "butter"} =&gt;
826      * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
827      * </p>
828      *
829      * <p>Note that XML 1.0 is a text-only format: it cannot represent control
830      * characters or unpaired Unicode surrogate codepoints, even after escaping.
831      * {@code escapeXml10} will remove characters that do not fit in the
832      * following ranges:</p>
833      *
834      * <p>{@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
835      *
836      * <p>Though not strictly necessary, {@code escapeXml10} will escape
837      * characters in the following ranges:</p>
838      *
839      * <p>{@code [#x7F-#x84] | [#x86-#x9F]}</p>
840      *
841      * <p>The returned string can be inserted into a valid XML 1.0 or XML 1.1
842      * document. If you want to allow more non-text characters in an XML 1.1
843      * document, use {@link #escapeXml11(String)}.</p>
844      *
845      * @param input  the {@code String} to escape, may be null
846      * @return a new escaped {@code String}, {@code null} if null string input
847      * @see #unescapeXml(java.lang.String)
848      */
849     public static String escapeXml10(final String input) {
850         return ESCAPE_XML10.translate(input);
851     }
852 
853     /**
854      * <p>Escapes the characters in a {@code String} using XML entities.</p>
855      *
856      * <p>For example: {@code "bread" & "butter"} =&gt;
857      * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
858      * </p>
859      *
860      * <p>XML 1.1 can represent certain control characters, but it cannot represent
861      * the null byte or unpaired Unicode surrogate codepoints, even after escaping.
862      * {@code escapeXml11} will remove characters that do not fit in the following
863      * ranges:</p>
864      *
865      * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
866      *
867      * <p>{@code escapeXml11} will escape characters in the following ranges:</p>
868      *
869      * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p>
870      *
871      * <p>The returned string can be inserted into a valid XML 1.1 document. Do not
872      * use it for XML 1.0 documents.</p>
873      *
874      * @param input  the {@code String} to escape, may be null
875      * @return a new escaped {@code String}, {@code null} if null string input
876      * @see #unescapeXml(java.lang.String)
877      */
878     public static String escapeXml11(final String input) {
879         return ESCAPE_XML11.translate(input);
880     }
881 
882     //-----------------------------------------------------------------------
883     /**
884      * <p>Unescapes a string containing XML entity escapes to a string
885      * containing the actual Unicode characters corresponding to the
886      * escapes.</p>
887      *
888      * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
889      * Does not support DTDs or external entities.</p>
890      *
891      * <p>Note that numerical \\u Unicode codes are unescaped to their respective 
892      *    Unicode characters. This may change in future releases. </p>
893      *
894      * @param input  the {@code String} to unescape, may be null
895      * @return a new unescaped {@code String}, {@code null} if null string input
896      * @see #escapeXml10(String)
897      * @see #escapeXml11(String)
898      */
899     public static final String unescapeXml(final String input) {
900         return UNESCAPE_XML.translate(input);
901     }
902 
903     //-----------------------------------------------------------------------
904 
905     /**
906      * <p>Returns a {@code String} value for a CSV column enclosed in double quotes,
907      * if required.</p>
908      *
909      * <p>If the value contains a comma, newline or double quote, then the
910      *    String value is returned enclosed in double quotes.</p>
911      *
912      * <p>Any double quote characters in the value are escaped with another double quote.</p>
913      *
914      * <p>If the value does not contain a comma, newline or double quote, then the
915      *    String value is returned unchanged.</p>
916      *
917      * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
918      * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
919      *
920      * @param input the input CSV column String, may be null
921      * @return the input String, enclosed in double quotes if the value contains a comma,
922      * newline or double quote, {@code null} if null string input
923      */
924     public static final String escapeCsv(final String input) {
925         return ESCAPE_CSV.translate(input);
926     }
927 
928     /**
929      * <p>Returns a {@code String} value for an unescaped CSV column. </p>
930      *
931      * <p>If the value is enclosed in double quotes, and contains a comma, newline 
932      *    or double quote, then quotes are removed. 
933      * </p>
934      *
935      * <p>Any double quote escaped characters (a pair of double quotes) are unescaped 
936      *    to just one double quote. </p>
937      *
938      * <p>If the value is not enclosed in double quotes, or is and does not contain a 
939      *    comma, newline or double quote, then the String value is returned unchanged.</p>
940      *
941      * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
942      * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
943      *
944      * @param input the input CSV column String, may be null
945      * @return the input String, with enclosing double quotes removed and embedded double 
946      * quotes unescaped, {@code null} if null string input
947      */
948     public static final String unescapeCsv(final String input) {
949         return UNESCAPE_CSV.translate(input);
950     }
951 
952     /**
953      * <p>Escapes the characters in a {@code String} using XSI rules.</p>
954      *
955      * <p><b>Beware!</b> In most cases you don't want to escape shell commands but use multi-argument
956      * methods provided by {@link java.lang.ProcessBuilder} or {@link java.lang.Runtime#exec(String[])}
957      * instead.</p>
958      *
959      * <p>Example:</p>
960      * <pre>
961      * input string: He didn't say, "Stop!"
962      * output string: He\ didn\'t\ say,\ \"Stop!\"
963      * </pre>
964      *
965      * @see <a href="http://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a>
966      * @param input  String to escape values in, may be null
967      * @return String with escaped values, {@code null} if null string input
968      */
969     public static final String escapeXSI(final String input) {
970         return ESCAPE_XSI.translate(input);
971     }
972 
973     /**
974      * <p>Unescapes the characters in a {@code String} using XSI rules.</p>
975      *
976      * @see StringEscapeUtils#escapeXSI(String)
977      * @param input  the {@code String} to unescape, may be null
978      * @return a new unescaped {@code String}, {@code null} if null string input
979      */
980     public static final String unescapeXSI(final String input) {
981         return UNESCAPE_XSI.translate(input);
982     }
983 
984 }