001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.text;
018
019import java.io.IOException;
020import java.io.Writer;
021import java.util.Collections;
022import java.util.HashMap;
023import java.util.Map;
024
025import org.apache.commons.lang3.StringUtils;
026import org.apache.commons.text.translate.AggregateTranslator;
027import org.apache.commons.text.translate.CharSequenceTranslator;
028import org.apache.commons.text.translate.CsvTranslators;
029import org.apache.commons.text.translate.EntityArrays;
030import org.apache.commons.text.translate.JavaUnicodeEscaper;
031import org.apache.commons.text.translate.LookupTranslator;
032import org.apache.commons.text.translate.NumericEntityEscaper;
033import org.apache.commons.text.translate.NumericEntityUnescaper;
034import org.apache.commons.text.translate.OctalUnescaper;
035import org.apache.commons.text.translate.UnicodeUnescaper;
036import org.apache.commons.text.translate.UnicodeUnpairedSurrogateRemover;
037
038/**
039 * <p>
040 * Escapes and unescapes {@code String}s for Java, Java Script, HTML and XML.
041 * </p>
042 *
043 * <p>
044 * #ThreadSafe#
045 * </p>
046 *
047 * <p>
048 * This code has been adapted from Apache Commons Lang 3.5.
049 * </p>
050 *
051 * @since 1.0
052 */
053public class StringEscapeUtils {
054
055    /* ESCAPE TRANSLATORS */
056
057    /**
058     * Translator object for escaping Java.
059     *
060     * While {@link #escapeJava(String)} is the expected method of use, this
061     * object allows the Java escaping functionality to be used
062     * as the foundation for a custom translator.
063     */
064    public static final CharSequenceTranslator ESCAPE_JAVA;
065    static {
066        final Map<CharSequence, CharSequence> escapeJavaMap = new HashMap<>();
067        escapeJavaMap.put("\"", "\\\"");
068        escapeJavaMap.put("\\", "\\\\");
069        ESCAPE_JAVA = new AggregateTranslator(
070                new LookupTranslator(Collections.unmodifiableMap(escapeJavaMap)),
071                new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE),
072                JavaUnicodeEscaper.outsideOf(32, 0x7f)
073        );
074    }
075
076    /**
077     * Translator object for escaping EcmaScript/JavaScript.
078     *
079     * While {@link #escapeEcmaScript(String)} is the expected method of use, this
080     * object allows the EcmaScript escaping functionality to be used
081     * as the foundation for a custom translator.
082     */
083    public static final CharSequenceTranslator ESCAPE_ECMASCRIPT;
084    static {
085        final Map<CharSequence, CharSequence> escapeEcmaScriptMap = new HashMap<>();
086        escapeEcmaScriptMap.put("'", "\\'");
087        escapeEcmaScriptMap.put("\"", "\\\"");
088        escapeEcmaScriptMap.put("\\", "\\\\");
089        escapeEcmaScriptMap.put("/", "\\/");
090        ESCAPE_ECMASCRIPT = new AggregateTranslator(
091                new LookupTranslator(Collections.unmodifiableMap(escapeEcmaScriptMap)),
092                new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE),
093                JavaUnicodeEscaper.outsideOf(32, 0x7f)
094        );
095    }
096
097    /**
098     * Translator object for escaping Json.
099     *
100     * While {@link #escapeJson(String)} is the expected method of use, this
101     * object allows the Json escaping functionality to be used
102     * as the foundation for a custom translator.
103     */
104    public static final CharSequenceTranslator ESCAPE_JSON;
105    static {
106        final Map<CharSequence, CharSequence> escapeJsonMap = new HashMap<>();
107        escapeJsonMap.put("\"", "\\\"");
108        escapeJsonMap.put("\\", "\\\\");
109        escapeJsonMap.put("/", "\\/");
110        ESCAPE_JSON = new AggregateTranslator(
111                new LookupTranslator(Collections.unmodifiableMap(escapeJsonMap)),
112                new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE),
113                JavaUnicodeEscaper.outsideOf(32, 0x7e)
114        );
115    }
116
117    /**
118     * Translator object for escaping XML 1.0.
119     *
120     * While {@link #escapeXml10(String)} is the expected method of use, this
121     * object allows the XML escaping functionality to be used
122     * as the foundation for a custom translator.
123     */
124    public static final CharSequenceTranslator ESCAPE_XML10;
125    static {
126        final Map<CharSequence, CharSequence> escapeXml10Map = new HashMap<>();
127        escapeXml10Map.put("\u0000", StringUtils.EMPTY);
128        escapeXml10Map.put("\u0001", StringUtils.EMPTY);
129        escapeXml10Map.put("\u0002", StringUtils.EMPTY);
130        escapeXml10Map.put("\u0003", StringUtils.EMPTY);
131        escapeXml10Map.put("\u0004", StringUtils.EMPTY);
132        escapeXml10Map.put("\u0005", StringUtils.EMPTY);
133        escapeXml10Map.put("\u0006", StringUtils.EMPTY);
134        escapeXml10Map.put("\u0007", StringUtils.EMPTY);
135        escapeXml10Map.put("\u0008", StringUtils.EMPTY);
136        escapeXml10Map.put("\u000b", StringUtils.EMPTY);
137        escapeXml10Map.put("\u000c", StringUtils.EMPTY);
138        escapeXml10Map.put("\u000e", StringUtils.EMPTY);
139        escapeXml10Map.put("\u000f", StringUtils.EMPTY);
140        escapeXml10Map.put("\u0010", StringUtils.EMPTY);
141        escapeXml10Map.put("\u0011", StringUtils.EMPTY);
142        escapeXml10Map.put("\u0012", StringUtils.EMPTY);
143        escapeXml10Map.put("\u0013", StringUtils.EMPTY);
144        escapeXml10Map.put("\u0014", StringUtils.EMPTY);
145        escapeXml10Map.put("\u0015", StringUtils.EMPTY);
146        escapeXml10Map.put("\u0016", StringUtils.EMPTY);
147        escapeXml10Map.put("\u0017", StringUtils.EMPTY);
148        escapeXml10Map.put("\u0018", StringUtils.EMPTY);
149        escapeXml10Map.put("\u0019", StringUtils.EMPTY);
150        escapeXml10Map.put("\u001a", StringUtils.EMPTY);
151        escapeXml10Map.put("\u001b", StringUtils.EMPTY);
152        escapeXml10Map.put("\u001c", StringUtils.EMPTY);
153        escapeXml10Map.put("\u001d", StringUtils.EMPTY);
154        escapeXml10Map.put("\u001e", StringUtils.EMPTY);
155        escapeXml10Map.put("\u001f", StringUtils.EMPTY);
156        escapeXml10Map.put("\ufffe", StringUtils.EMPTY);
157        escapeXml10Map.put("\uffff", StringUtils.EMPTY);
158        ESCAPE_XML10 = new AggregateTranslator(
159                new LookupTranslator(EntityArrays.BASIC_ESCAPE),
160                new LookupTranslator(EntityArrays.APOS_ESCAPE),
161                new LookupTranslator(Collections.unmodifiableMap(escapeXml10Map)),
162                NumericEntityEscaper.between(0x7f, 0x84),
163                NumericEntityEscaper.between(0x86, 0x9f),
164                new UnicodeUnpairedSurrogateRemover()
165        );
166    }
167
168    /**
169     * Translator object for escaping XML 1.1.
170     *
171     * While {@link #escapeXml11(String)} is the expected method of use, this
172     * object allows the XML escaping functionality to be used
173     * as the foundation for a custom translator.
174     */
175    public static final CharSequenceTranslator ESCAPE_XML11;
176    static {
177        final Map<CharSequence, CharSequence> escapeXml11Map = new HashMap<>();
178        escapeXml11Map.put("\u0000", StringUtils.EMPTY);
179        escapeXml11Map.put("\u000b", "&#11;");
180        escapeXml11Map.put("\u000c", "&#12;");
181        escapeXml11Map.put("\ufffe", StringUtils.EMPTY);
182        escapeXml11Map.put("\uffff", StringUtils.EMPTY);
183        ESCAPE_XML11 = new AggregateTranslator(
184                new LookupTranslator(EntityArrays.BASIC_ESCAPE),
185                new LookupTranslator(EntityArrays.APOS_ESCAPE),
186                new LookupTranslator(Collections.unmodifiableMap(escapeXml11Map)),
187                NumericEntityEscaper.between(0x1, 0x8),
188                NumericEntityEscaper.between(0xe, 0x1f),
189                NumericEntityEscaper.between(0x7f, 0x84),
190                NumericEntityEscaper.between(0x86, 0x9f),
191                new UnicodeUnpairedSurrogateRemover()
192        );
193    }
194
195    /**
196     * Translator object for escaping HTML version 3.0.
197     *
198     * While {@link #escapeHtml3(String)} is the expected method of use, this
199     * object allows the HTML escaping functionality to be used
200     * as the foundation for a custom translator.
201     */
202    public static final CharSequenceTranslator ESCAPE_HTML3 =
203            new AggregateTranslator(
204                    new LookupTranslator(EntityArrays.BASIC_ESCAPE),
205                    new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE)
206            );
207
208    /**
209     * Translator object for escaping HTML version 4.0.
210     *
211     * While {@link #escapeHtml4(String)} is the expected method of use, this
212     * object allows the HTML escaping functionality to be used
213     * as the foundation for a custom translator.
214     */
215    public static final CharSequenceTranslator ESCAPE_HTML4 =
216            new AggregateTranslator(
217                    new LookupTranslator(EntityArrays.BASIC_ESCAPE),
218                    new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE),
219                    new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE)
220            );
221
222    /**
223     * Translator object for escaping individual Comma Separated Values.
224     *
225     * While {@link #escapeCsv(String)} is the expected method of use, this
226     * object allows the CSV escaping functionality to be used
227     * as the foundation for a custom translator.
228     */
229    public static final CharSequenceTranslator ESCAPE_CSV = new CsvTranslators.CsvEscaper();
230
231    /**
232     * Translator object for escaping Shell command language.
233     *
234     * @see <a href="http://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a>
235     */
236    public static final CharSequenceTranslator ESCAPE_XSI;
237    static {
238        final Map<CharSequence, CharSequence> escapeXsiMap = new HashMap<>();
239        escapeXsiMap.put("|", "\\|");
240        escapeXsiMap.put("&", "\\&");
241        escapeXsiMap.put(";", "\\;");
242        escapeXsiMap.put("<", "\\<");
243        escapeXsiMap.put(">", "\\>");
244        escapeXsiMap.put("(", "\\(");
245        escapeXsiMap.put(")", "\\)");
246        escapeXsiMap.put("$", "\\$");
247        escapeXsiMap.put("`", "\\`");
248        escapeXsiMap.put("\\", "\\\\");
249        escapeXsiMap.put("\"", "\\\"");
250        escapeXsiMap.put("'", "\\'");
251        escapeXsiMap.put(" ", "\\ ");
252        escapeXsiMap.put("\t", "\\\t");
253        escapeXsiMap.put("\r\n", StringUtils.EMPTY);
254        escapeXsiMap.put("\n", StringUtils.EMPTY);
255        escapeXsiMap.put("*", "\\*");
256        escapeXsiMap.put("?", "\\?");
257        escapeXsiMap.put("[", "\\[");
258        escapeXsiMap.put("#", "\\#");
259        escapeXsiMap.put("~", "\\~");
260        escapeXsiMap.put("=", "\\=");
261        escapeXsiMap.put("%", "\\%");
262        ESCAPE_XSI = new LookupTranslator(
263                Collections.unmodifiableMap(escapeXsiMap)
264        );
265    }
266
267    /* UNESCAPE TRANSLATORS */
268
269    /**
270     * Translator object for unescaping escaped Java.
271     *
272     * While {@link #unescapeJava(String)} is the expected method of use, this
273     * object allows the Java unescaping functionality to be used
274     * as the foundation for a custom translator.
275     */
276    public static final CharSequenceTranslator UNESCAPE_JAVA;
277    static {
278        final Map<CharSequence, CharSequence> unescapeJavaMap = new HashMap<>();
279        unescapeJavaMap.put("\\\\", "\\");
280        unescapeJavaMap.put("\\\"", "\"");
281        unescapeJavaMap.put("\\'", "'");
282        unescapeJavaMap.put("\\", StringUtils.EMPTY);
283        UNESCAPE_JAVA = new AggregateTranslator(
284                new OctalUnescaper(),     // .between('\1', '\377'),
285                new UnicodeUnescaper(),
286                new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE),
287                new LookupTranslator(Collections.unmodifiableMap(unescapeJavaMap))
288        );
289    }
290
291    /**
292     * Translator object for unescaping escaped EcmaScript.
293     *
294     * While {@link #unescapeEcmaScript(String)} is the expected method of use, this
295     * object allows the EcmaScript unescaping functionality to be used
296     * as the foundation for a custom translator.
297     */
298    public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA;
299
300    /**
301     * Translator object for unescaping escaped Json.
302     *
303     * While {@link #unescapeJson(String)} is the expected method of use, this
304     * object allows the Json unescaping functionality to be used
305     * as the foundation for a custom translator.
306     */
307    public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA;
308
309    /**
310     * Translator object for unescaping escaped HTML 3.0.
311     *
312     * While {@link #unescapeHtml3(String)} is the expected method of use, this
313     * object allows the HTML unescaping functionality to be used
314     * as the foundation for a custom translator.
315     */
316    public static final CharSequenceTranslator UNESCAPE_HTML3 =
317            new AggregateTranslator(
318                    new LookupTranslator(EntityArrays.BASIC_UNESCAPE),
319                    new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE),
320                    new NumericEntityUnescaper()
321            );
322
323    /**
324     * Translator object for unescaping escaped HTML 4.0.
325     *
326     * While {@link #unescapeHtml4(String)} is the expected method of use, this
327     * object allows the HTML unescaping functionality to be used
328     * as the foundation for a custom translator.
329     */
330    public static final CharSequenceTranslator UNESCAPE_HTML4 =
331            new AggregateTranslator(
332                    new LookupTranslator(EntityArrays.BASIC_UNESCAPE),
333                    new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE),
334                    new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE),
335                    new NumericEntityUnescaper()
336            );
337
338    /**
339     * Translator object for unescaping escaped XML.
340     *
341     * While {@link #unescapeXml(String)} is the expected method of use, this
342     * object allows the XML unescaping functionality to be used
343     * as the foundation for a custom translator.
344     */
345    public static final CharSequenceTranslator UNESCAPE_XML =
346            new AggregateTranslator(
347                    new LookupTranslator(EntityArrays.BASIC_UNESCAPE),
348                    new LookupTranslator(EntityArrays.APOS_UNESCAPE),
349                    new NumericEntityUnescaper()
350            );
351
352    /**
353     * Translator object for unescaping escaped Comma Separated Value entries.
354     *
355     * While {@link #unescapeCsv(String)} is the expected method of use, this
356     * object allows the CSV unescaping functionality to be used
357     * as the foundation for a custom translator.
358     */
359    public static final CharSequenceTranslator UNESCAPE_CSV = new CsvTranslators.CsvUnescaper();
360
361    /**
362     * Translator object for unescaping escaped XSI Value entries.
363     *
364     * While {@link #unescapeXSI(String)}  is the expected method of use, this
365     * object allows the XSI unescaping functionality to be used
366     * as the foundation for a custom translator.
367     */
368    public static final CharSequenceTranslator UNESCAPE_XSI = new XsiUnescaper();
369
370    /**
371     * Translator object for unescaping backslash escaped entries.
372     */
373    static class XsiUnescaper extends CharSequenceTranslator {
374
375        /**
376         * Escaped backslash constant.
377         */
378        private static final char BACKSLASH = '\\';
379
380        @Override
381        public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
382
383            if (index != 0) {
384                throw new IllegalStateException("XsiUnescaper should never reach the [1] index");
385            }
386
387            final String s = input.toString();
388
389            int segmentStart = 0;
390            int searchOffset = 0;
391            while (true) {
392                final int pos = s.indexOf(BACKSLASH, searchOffset);
393                if (pos == -1) {
394                    if (segmentStart < s.length()) {
395                        out.write(s.substring(segmentStart));
396                    }
397                    break;
398                }
399                if (pos > segmentStart) {
400                    out.write(s.substring(segmentStart, pos));
401                }
402                segmentStart = pos + 1;
403                searchOffset = pos + 2;
404            }
405
406            return Character.codePointCount(input, 0, input.length());
407        }
408    }
409
410    /* Helper functions */
411
412    /**
413     * <p>{@code StringEscapeUtils} instances should NOT be constructed in
414     * standard programming.</p>
415     *
416     * <p>Instead, the class should be used as:</p>
417     * <pre>StringEscapeUtils.escapeJava("foo");</pre>
418     *
419     * <p>This constructor is public to permit tools that require a JavaBean
420     * instance to operate.</p>
421     */
422    public StringEscapeUtils() {
423        super();
424    }
425
426    /**
427     * <p>Convenience wrapper for {@link java.lang.StringBuilder} providing escape methods.</p>
428     *
429     * <p>Example:</p>
430     * <pre>
431     * new Builder(ESCAPE_HTML4)
432     *      .append("&lt;p&gt;")
433     *      .escape("This is paragraph 1 and special chars like &amp; get escaped.")
434     *      .append("&lt;/p&gt;&lt;p&gt;")
435     *      .escape("This is paragraph 2 &amp; more...")
436     *      .append("&lt;/p&gt;")
437     *      .toString()
438     * </pre>
439     *
440     */
441    public static final class Builder {
442
443        /**
444         * StringBuilder to be used in the Builder class.
445         */
446        private final StringBuilder sb;
447
448        /**
449         * CharSequenceTranslator to be used in the Builder class.
450         */
451        private final CharSequenceTranslator translator;
452
453        /**
454         * Builder constructor.
455         *
456         * @param translator a CharSequenceTranslator.
457         */
458        private Builder(final CharSequenceTranslator translator) {
459            this.sb = new StringBuilder();
460            this.translator = translator;
461        }
462
463        /**
464         * <p>Escape {@code input} according to the given {@link CharSequenceTranslator}.</p>
465         *
466         * @param input the String to escape
467         * @return {@code this}, to enable chaining
468         */
469        public Builder escape(final String input) {
470            sb.append(translator.translate(input));
471            return this;
472        }
473
474        /**
475         * Literal append, no escaping being done.
476         *
477         * @param input the String to append
478         * @return {@code this}, to enable chaining
479         */
480        public Builder append(final String input) {
481            sb.append(input);
482            return this;
483        }
484
485        /**
486         * <p>Return the escaped string.</p>
487         *
488         * @return The escaped string
489         */
490        @Override
491        public String toString() {
492            return sb.toString();
493        }
494    }
495
496    /**
497     * Get a {@link Builder}.
498     * @param translator the text translator
499     * @return {@link Builder}
500     */
501    public static StringEscapeUtils.Builder builder(final CharSequenceTranslator translator) {
502        return new Builder(translator);
503    }
504
505    // Java and JavaScript
506    //--------------------------------------------------------------------------
507    /**
508     * <p>Escapes the characters in a {@code String} using Java String rules.</p>
509     *
510     * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
511     *
512     * <p>So a tab becomes the characters {@code '\\'} and
513     * {@code 't'}.</p>
514     *
515     * <p>The only difference between Java strings and JavaScript strings
516     * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p>
517     *
518     * <p>Example:</p>
519     * <pre>
520     * input string: He didn't say, "Stop!"
521     * output string: He didn't say, \"Stop!\"
522     * </pre>
523     *
524     * @param input  String to escape values in, may be null
525     * @return String with escaped values, {@code null} if null string input
526     */
527    public static final String escapeJava(final String input) {
528        return ESCAPE_JAVA.translate(input);
529    }
530
531    /**
532     * <p>Escapes the characters in a {@code String} using EcmaScript String rules.</p>
533     * <p>Escapes any values it finds into their EcmaScript String form.
534     * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
535     *
536     * <p>So a tab becomes the characters {@code '\\'} and
537     * {@code 't'}.</p>
538     *
539     * <p>The only difference between Java strings and EcmaScript strings
540     * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p>
541     *
542     * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects.</p>
543     *
544     * <p>Example:</p>
545     * <pre>
546     * input string: He didn't say, "Stop!"
547     * output string: He didn\'t say, \"Stop!\"
548     * </pre>
549     *
550     * <b>Security Note.</b> We only provide backslash escaping in this method. For example, {@code '\"'} has the output
551     * {@code '\\\"'} which could result in potential issues in the case where the string being escaped is being used
552     * in an HTML tag like {@code <select onmouseover="..." />}. If you wish to have more rigorous string escaping, you
553     * may consider the
554     * <a href="https://www.owasp.org/index.php/Category:OWASP_Enterprise_Security_API_JAVA">ESAPI Libraries</a>.
555     * Further, you can view the <a href="https://github.com/esapi">ESAPI GitHub Org</a>.
556     *
557     * @param input  String to escape values in, may be null
558     * @return String with escaped values, {@code null} if null string input
559     */
560    public static final String escapeEcmaScript(final String input) {
561        return ESCAPE_ECMASCRIPT.translate(input);
562    }
563
564    /**
565     * <p>Escapes the characters in a {@code String} using Json String rules.</p>
566     * <p>Escapes any values it finds into their Json String form.
567     * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
568     *
569     * <p>So a tab becomes the characters {@code '\\'} and
570     * {@code 't'}.</p>
571     *
572     * <p>The only difference between Java strings and Json strings
573     * is that in Json, forward-slash (/) is escaped.</p>
574     *
575     * <p>See http://www.ietf.org/rfc/rfc4627.txt for further details.</p>
576     *
577     * <p>Example:</p>
578     * <pre>
579     * input string: He didn't say, "Stop!"
580     * output string: He didn't say, \"Stop!\"
581     * </pre>
582     *
583     * @param input  String to escape values in, may be null
584     * @return String with escaped values, {@code null} if null string input
585     */
586    public static final String escapeJson(final String input) {
587        return ESCAPE_JSON.translate(input);
588    }
589
590    /**
591     * <p>Unescapes any Java literals found in the {@code String}.
592     * For example, it will turn a sequence of {@code '\'} and
593     * {@code 'n'} into a newline character, unless the {@code '\'}
594     * is preceded by another {@code '\'}.</p>
595     *
596     * @param input  the {@code String} to unescape, may be null
597     * @return a new unescaped {@code String}, {@code null} if null string input
598     */
599    public static final String unescapeJava(final String input) {
600        return UNESCAPE_JAVA.translate(input);
601    }
602
603    /**
604     * <p>Unescapes any EcmaScript literals found in the {@code String}.</p>
605     *
606     * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
607     * into a newline character, unless the {@code '\'} is preceded by another
608     * {@code '\'}.</p>
609     *
610     * @see #unescapeJava(String)
611     * @param input  the {@code String} to unescape, may be null
612     * @return A new unescaped {@code String}, {@code null} if null string input
613     */
614    public static final String unescapeEcmaScript(final String input) {
615        return UNESCAPE_ECMASCRIPT.translate(input);
616    }
617
618    /**
619     * <p>Unescapes any Json literals found in the {@code String}.</p>
620     *
621     * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
622     * into a newline character, unless the {@code '\'} is preceded by another
623     * {@code '\'}.</p>
624     *
625     * @see #unescapeJava(String)
626     * @param input  the {@code String} to unescape, may be null
627     * @return A new unescaped {@code String}, {@code null} if null string input
628     */
629    public static final String unescapeJson(final String input) {
630        return UNESCAPE_JSON.translate(input);
631    }
632
633    // HTML and XML
634    //--------------------------------------------------------------------------
635    /**
636     * <p>Escapes the characters in a {@code String} using HTML entities.</p>
637     *
638     * <p>
639     * For example:
640     * </p>
641     * <p>{@code "bread" &amp; "butter"}</p>
642     * becomes:
643     * <p>
644     * {@code &amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;}.
645     * </p>
646     *
647     * <p>Supports all known HTML 4.0 entities, including funky accents.
648     * Note that the commonly used apostrophe escape character (&amp;apos;)
649     * is not a legal entity and so is not supported).</p>
650     *
651     * @param input  the {@code String} to escape, may be null
652     * @return a new escaped {@code String}, {@code null} if null string input
653     *
654     * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
655     * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
656     * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
657     * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
658     * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
659     */
660    public static final String escapeHtml4(final String input) {
661        return ESCAPE_HTML4.translate(input);
662    }
663
664    /**
665     * <p>Escapes the characters in a {@code String} using HTML entities.</p>
666     * <p>Supports only the HTML 3.0 entities.</p>
667     *
668     * @param input  the {@code String} to escape, may be null
669     * @return a new escaped {@code String}, {@code null} if null string input
670     */
671    public static final String escapeHtml3(final String input) {
672        return ESCAPE_HTML3.translate(input);
673    }
674
675    //-----------------------------------------------------------------------
676    /**
677     * <p>Unescapes a string containing entity escapes to a string
678     * containing the actual Unicode characters corresponding to the
679     * escapes. Supports HTML 4.0 entities.</p>
680     *
681     * <p>For example, the string {@code "&lt;Fran&ccedil;ais&gt;"}
682     * will become {@code "<Fran�ais>"}</p>
683     *
684     * <p>If an entity is unrecognized, it is left alone, and inserted
685     * verbatim into the result string. e.g. {@code "&gt;&zzzz;x"} will
686     * become {@code ">&zzzz;x"}.</p>
687     *
688     * @param input  the {@code String} to unescape, may be null
689     * @return a new unescaped {@code String}, {@code null} if null string input
690     */
691    public static final String unescapeHtml4(final String input) {
692        return UNESCAPE_HTML4.translate(input);
693    }
694
695    /**
696     * <p>Unescapes a string containing entity escapes to a string
697     * containing the actual Unicode characters corresponding to the
698     * escapes. Supports only HTML 3.0 entities.</p>
699     *
700     * @param input  the {@code String} to unescape, may be null
701     * @return a new unescaped {@code String}, {@code null} if null string input
702     */
703    public static final String unescapeHtml3(final String input) {
704        return UNESCAPE_HTML3.translate(input);
705    }
706
707    /**
708     * <p>Escapes the characters in a {@code String} using XML entities.</p>
709     *
710     * <p>For example: {@code "bread" & "butter"} =&gt;
711     * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
712     * </p>
713     *
714     * <p>Note that XML 1.0 is a text-only format: it cannot represent control
715     * characters or unpaired Unicode surrogate codepoints, even after escaping.
716     * {@code escapeXml10} will remove characters that do not fit in the
717     * following ranges:</p>
718     *
719     * <p>{@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
720     *
721     * <p>Though not strictly necessary, {@code escapeXml10} will escape
722     * characters in the following ranges:</p>
723     *
724     * <p>{@code [#x7F-#x84] | [#x86-#x9F]}</p>
725     *
726     * <p>The returned string can be inserted into a valid XML 1.0 or XML 1.1
727     * document. If you want to allow more non-text characters in an XML 1.1
728     * document, use {@link #escapeXml11(String)}.</p>
729     *
730     * @param input  the {@code String} to escape, may be null
731     * @return a new escaped {@code String}, {@code null} if null string input
732     * @see #unescapeXml(java.lang.String)
733     */
734    public static String escapeXml10(final String input) {
735        return ESCAPE_XML10.translate(input);
736    }
737
738    /**
739     * <p>Escapes the characters in a {@code String} using XML entities.</p>
740     *
741     * <p>For example: {@code "bread" & "butter"} =&gt;
742     * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
743     * </p>
744     *
745     * <p>XML 1.1 can represent certain control characters, but it cannot represent
746     * the null byte or unpaired Unicode surrogate codepoints, even after escaping.
747     * {@code escapeXml11} will remove characters that do not fit in the following
748     * ranges:</p>
749     *
750     * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
751     *
752     * <p>{@code escapeXml11} will escape characters in the following ranges:</p>
753     *
754     * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p>
755     *
756     * <p>The returned string can be inserted into a valid XML 1.1 document. Do not
757     * use it for XML 1.0 documents.</p>
758     *
759     * @param input  the {@code String} to escape, may be null
760     * @return a new escaped {@code String}, {@code null} if null string input
761     * @see #unescapeXml(java.lang.String)
762     */
763    public static String escapeXml11(final String input) {
764        return ESCAPE_XML11.translate(input);
765    }
766
767    //-----------------------------------------------------------------------
768    /**
769     * <p>Unescapes a string containing XML entity escapes to a string
770     * containing the actual Unicode characters corresponding to the
771     * escapes.</p>
772     *
773     * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
774     * Does not support DTDs or external entities.</p>
775     *
776     * <p>Note that numerical \\u Unicode codes are unescaped to their respective
777     *    Unicode characters. This may change in future releases.</p>
778     *
779     * @param input  the {@code String} to unescape, may be null
780     * @return a new unescaped {@code String}, {@code null} if null string input
781     * @see #escapeXml10(String)
782     * @see #escapeXml11(String)
783     */
784    public static final String unescapeXml(final String input) {
785        return UNESCAPE_XML.translate(input);
786    }
787
788    //-----------------------------------------------------------------------
789
790    /**
791     * <p>Returns a {@code String} value for a CSV column enclosed in double quotes,
792     * if required.</p>
793     *
794     * <p>If the value contains a comma, newline or double quote, then the
795     *    String value is returned enclosed in double quotes.</p>
796     *
797     * <p>Any double quote characters in the value are escaped with another double quote.</p>
798     *
799     * <p>If the value does not contain a comma, newline or double quote, then the
800     *    String value is returned unchanged.</p>
801     *
802     * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
803     * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
804     *
805     * @param input the input CSV column String, may be null
806     * @return The input String, enclosed in double quotes if the value contains a comma,
807     * newline or double quote, {@code null} if null string input
808     */
809    public static final String escapeCsv(final String input) {
810        return ESCAPE_CSV.translate(input);
811    }
812
813    /**
814     * <p>Returns a {@code String} value for an unescaped CSV column.</p>
815     *
816     * <p>If the value is enclosed in double quotes, and contains a comma, newline
817     *    or double quote, then quotes are removed.
818     * </p>
819     *
820     * <p>Any double quote escaped characters (a pair of double quotes) are unescaped
821     *    to just one double quote.</p>
822     *
823     * <p>If the value is not enclosed in double quotes, or is and does not contain a
824     *    comma, newline or double quote, then the String value is returned unchanged.</p>
825     *
826     * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
827     * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
828     *
829     * @param input the input CSV column String, may be null
830     * @return The input String, with enclosing double quotes removed and embedded double
831     * quotes unescaped, {@code null} if null string input
832     */
833    public static final String unescapeCsv(final String input) {
834        return UNESCAPE_CSV.translate(input);
835    }
836
837    /**
838     * <p>Escapes the characters in a {@code String} using XSI rules.</p>
839     *
840     * <p><b>Beware!</b> In most cases you don't want to escape shell commands but use multi-argument
841     * methods provided by {@link java.lang.ProcessBuilder} or {@link java.lang.Runtime#exec(String[])}
842     * instead.</p>
843     *
844     * <p>Example:</p>
845     * <pre>
846     * input string: He didn't say, "Stop!"
847     * output string: He\ didn\'t\ say,\ \"Stop!\"
848     * </pre>
849     *
850     * @see <a href="http://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a>
851     * @param input  String to escape values in, may be null
852     * @return String with escaped values, {@code null} if null string input
853     */
854    public static final String escapeXSI(final String input) {
855        return ESCAPE_XSI.translate(input);
856    }
857
858    /**
859     * <p>Unescapes the characters in a {@code String} using XSI rules.</p>
860     *
861     * @see StringEscapeUtils#escapeXSI(String)
862     * @param input  the {@code String} to unescape, may be null
863     * @return a new unescaped {@code String}, {@code null} if null string input
864     */
865    public static final String unescapeXSI(final String input) {
866        return UNESCAPE_XSI.translate(input);
867    }
868
869}