001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 * 
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 * 
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.text;
018
019import org.apache.commons.lang3.CharUtils;
020import org.apache.commons.lang3.StringUtils;
021import org.apache.commons.text.translate.AggregateTranslator;
022import org.apache.commons.text.translate.CharSequenceTranslator;
023import org.apache.commons.text.translate.EntityArrays;
024import org.apache.commons.text.translate.JavaUnicodeEscaper;
025import org.apache.commons.text.translate.LookupTranslator;
026import org.apache.commons.text.translate.NumericEntityEscaper;
027import org.apache.commons.text.translate.NumericEntityUnescaper;
028import org.apache.commons.text.translate.OctalUnescaper;
029import org.apache.commons.text.translate.SingleLookupTranslator;
030import org.apache.commons.text.translate.UnicodeUnescaper;
031import org.apache.commons.text.translate.UnicodeUnpairedSurrogateRemover;
032
033import java.io.IOException;
034import java.io.Writer;
035
036/**
037 * <p>Escapes and unescapes {@code String}s for
038 * Java, Java Script, HTML and XML.</p>
039 *
040 * <p>#ThreadSafe#</p>
041 *
042 *
043 * <p>
044 * This code has been adapted from Apache Commons Lang 3.5.
045 * </p>
046 *
047 * @since 1.0
048 */
049public class StringEscapeUtils {
050
051    /* ESCAPE TRANSLATORS */
052
053    /**
054     * Translator object for escaping Java. 
055     *
056     * While {@link #escapeJava(String)} is the expected method of use, this 
057     * object allows the Java escaping functionality to be used 
058     * as the foundation for a custom translator.
059     */
060    public static final CharSequenceTranslator ESCAPE_JAVA =
061            new LookupTranslator(
062                    new String[][] {
063                            {"\"", "\\\""},
064                            {"\\", "\\\\"},
065                    }).with(
066                    new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE())
067            ).with(
068                    JavaUnicodeEscaper.outsideOf(32, 0x7f)
069            );
070
071    /**
072     * Translator object for escaping EcmaScript/JavaScript. 
073     *
074     * While {@link #escapeEcmaScript(String)} is the expected method of use, this 
075     * object allows the EcmaScript escaping functionality to be used 
076     * as the foundation for a custom translator.
077     */
078    public static final CharSequenceTranslator ESCAPE_ECMASCRIPT =
079            new AggregateTranslator(
080                    new LookupTranslator(
081                            new String[][] {
082                                    {"'", "\\'"},
083                                    {"\"", "\\\""},
084                                    {"\\", "\\\\"},
085                                    {"/", "\\/"}
086                            }),
087                    new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
088                    JavaUnicodeEscaper.outsideOf(32, 0x7f)
089            );
090
091    /**
092     * Translator object for escaping Json.
093     *
094     * While {@link #escapeJson(String)} is the expected method of use, this
095     * object allows the Json escaping functionality to be used
096     * as the foundation for a custom translator.
097     */
098    public static final CharSequenceTranslator ESCAPE_JSON =
099            new AggregateTranslator(
100                    new LookupTranslator(
101                            new String[][] {
102                                    {"\"", "\\\""},
103                                    {"\\", "\\\\"},
104                                    {"/", "\\/"}
105                            }),
106                    new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
107                    JavaUnicodeEscaper.outsideOf(32, 0x7f)
108            );
109
110    /**
111     * Translator object for escaping XML 1.0.
112     *
113     * While {@link #escapeXml10(String)} is the expected method of use, this
114     * object allows the XML escaping functionality to be used
115     * as the foundation for a custom translator.
116     */
117    public static final CharSequenceTranslator ESCAPE_XML10 =
118            new AggregateTranslator(
119                    new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
120                    new LookupTranslator(EntityArrays.APOS_ESCAPE()),
121                    new LookupTranslator(
122                            new String[][] {
123                                    { "\u0000", StringUtils.EMPTY },
124                                    { "\u0001", StringUtils.EMPTY },
125                                    { "\u0002", StringUtils.EMPTY },
126                                    { "\u0003", StringUtils.EMPTY },
127                                    { "\u0004", StringUtils.EMPTY },
128                                    { "\u0005", StringUtils.EMPTY },
129                                    { "\u0006", StringUtils.EMPTY },
130                                    { "\u0007", StringUtils.EMPTY },
131                                    { "\u0008", StringUtils.EMPTY },
132                                    { "\u000b", StringUtils.EMPTY },
133                                    { "\u000c", StringUtils.EMPTY },
134                                    { "\u000e", StringUtils.EMPTY },
135                                    { "\u000f", StringUtils.EMPTY },
136                                    { "\u0010", StringUtils.EMPTY },
137                                    { "\u0011", StringUtils.EMPTY },
138                                    { "\u0012", StringUtils.EMPTY },
139                                    { "\u0013", StringUtils.EMPTY },
140                                    { "\u0014", StringUtils.EMPTY },
141                                    { "\u0015", StringUtils.EMPTY },
142                                    { "\u0016", StringUtils.EMPTY },
143                                    { "\u0017", StringUtils.EMPTY },
144                                    { "\u0018", StringUtils.EMPTY },
145                                    { "\u0019", StringUtils.EMPTY },
146                                    { "\u001a", StringUtils.EMPTY },
147                                    { "\u001b", StringUtils.EMPTY },
148                                    { "\u001c", StringUtils.EMPTY },
149                                    { "\u001d", StringUtils.EMPTY },
150                                    { "\u001e", StringUtils.EMPTY },
151                                    { "\u001f", StringUtils.EMPTY },
152                                    { "\ufffe", StringUtils.EMPTY },
153                                    { "\uffff", StringUtils.EMPTY }
154                            }),
155                    NumericEntityEscaper.between(0x7f, 0x84),
156                    NumericEntityEscaper.between(0x86, 0x9f),
157                    new UnicodeUnpairedSurrogateRemover()
158            );
159
160    /**
161     * Translator object for escaping XML 1.1.
162     *
163     * While {@link #escapeXml11(String)} is the expected method of use, this
164     * object allows the XML escaping functionality to be used
165     * as the foundation for a custom translator.
166     */
167    public static final CharSequenceTranslator ESCAPE_XML11 =
168            new AggregateTranslator(
169                    new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
170                    new LookupTranslator(EntityArrays.APOS_ESCAPE()),
171                    new LookupTranslator(
172                            new String[][] {
173                                    { "\u0000", StringUtils.EMPTY },
174                                    { "\u000b", "&#11;" },
175                                    { "\u000c", "&#12;" },
176                                    { "\ufffe", StringUtils.EMPTY },
177                                    { "\uffff", StringUtils.EMPTY }
178                            }),
179                    NumericEntityEscaper.between(0x1, 0x8),
180                    NumericEntityEscaper.between(0xe, 0x1f),
181                    NumericEntityEscaper.between(0x7f, 0x84),
182                    NumericEntityEscaper.between(0x86, 0x9f),
183                    new UnicodeUnpairedSurrogateRemover()
184            );
185
186    /**
187     * Translator object for escaping HTML version 3.0.
188     *
189     * While {@link #escapeHtml3(String)} is the expected method of use, this 
190     * object allows the HTML escaping functionality to be used 
191     * as the foundation for a custom translator.
192     */
193    public static final CharSequenceTranslator ESCAPE_HTML3 =
194            new AggregateTranslator(
195                    new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
196                    new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE())
197            );
198
199    /**
200     * The improved translator object for escaping HTML version 3.0.
201     * The 'improved' part of this translator is that it checks if the html is already translated.
202     * This check prevents double, triple, or recursive translations.
203     *
204     * While {@link #escapeHtml3Once(String)} is the expected method of use, this
205     * object allows the HTML escaping functionality to be used
206     * as the foundation for a custom translator.
207     *
208     * Note that, multiple lookup tables should be passed to this translator
209     * instead of passing multiple instances of this translator to the
210     * AggregateTranslator. Because, a SingleLookupTranslator only checks the values of the
211     * lookup table passed to that instance while deciding whether a value is
212     * already translated or not.
213     */
214    public static final CharSequenceTranslator ESCAPE_HTML3_ONCE =
215            new SingleLookupTranslator(EntityArrays.BASIC_ESCAPE(), EntityArrays.ISO8859_1_ESCAPE());
216
217
218    /**
219     * Translator object for escaping HTML version 4.0.
220     *
221     * While {@link #escapeHtml4(String)} is the expected method of use, this 
222     * object allows the HTML escaping functionality to be used 
223     * as the foundation for a custom translator.
224     */
225    public static final CharSequenceTranslator ESCAPE_HTML4 =
226            new AggregateTranslator(
227                    new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
228                    new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()),
229                    new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE())
230            );
231
232    /**
233     * The improved translator object for escaping HTML version 4.0.
234     * The 'improved' part of this translator is that it checks if the html is already translated.
235     * This check prevents double, triple, or recursive translations.
236     *
237     * While {@link #escapeHtml4Once(String)} is the expected method of use, this
238     * object allows the HTML escaping functionality to be used
239     * as the foundation for a custom translator.
240     *
241     * Note that, multiple lookup tables should be passed to this translator
242     * instead of passing multiple instances of this translator to the
243     * AggregateTranslator. Because, a SingleLookupTranslator only checks the values of the
244     * lookup table passed to that instance while deciding whether a value is
245     * already translated or not.
246     */
247    public static final CharSequenceTranslator ESCAPE_HTML4_ONCE =
248            new SingleLookupTranslator(EntityArrays.BASIC_ESCAPE(), EntityArrays.ISO8859_1_ESCAPE(), EntityArrays.HTML40_EXTENDED_ESCAPE());
249
250    /**
251     * Translator object for escaping individual Comma Separated Values. 
252     *
253     * While {@link #escapeCsv(String)} is the expected method of use, this 
254     * object allows the CSV escaping functionality to be used 
255     * as the foundation for a custom translator.
256     */
257    public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper();
258
259    // TODO: Create a parent class - 'SinglePassTranslator' ?
260    //       It would handle the index checking + length returning, 
261    //       and could also have an optimization check method.
262    static class CsvEscaper extends CharSequenceTranslator {
263
264        private static final char CSV_DELIMITER = ',';
265        private static final char CSV_QUOTE = '"';
266        private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
267        private static final char[] CSV_SEARCH_CHARS =
268                new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};
269
270        @Override
271        public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
272
273            if(index != 0) {
274                throw new IllegalStateException("CsvEscaper should never reach the [1] index");
275            }
276
277            if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) {
278                out.write(input.toString());
279            } else {
280                out.write(CSV_QUOTE);
281                out.write(StringUtils.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR));
282                out.write(CSV_QUOTE);
283            }
284            return Character.codePointCount(input, 0, input.length());
285        }
286    }
287
288    /**
289     * Translator object for escaping Shell command language.
290     *
291     * @see <a href="http://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a>
292     */
293    public static final CharSequenceTranslator ESCAPE_XSI =
294        new LookupTranslator(
295            new String[][] {
296                    {"|", "\\|"},
297                    {"&", "\\&"},
298                    {";", "\\;"},
299                    {"<", "\\<"},
300                    {">", "\\>"},
301                    {"(", "\\("},
302                    {")", "\\)"},
303                    {"$", "\\$"},
304                    {"`", "\\`"},
305                    {"\\", "\\\\"},
306                    {"\"", "\\\""},
307                    {"'", "\\'"},
308                    {" ", "\\ "},
309                    {"\t", "\\\t"},
310                    {"\r\n", ""},
311                    {"\n", ""},
312                    {"*", "\\*"},
313                    {"?", "\\?"},
314                    {"[", "\\["},
315                    {"#", "\\#"},
316                    {"~", "\\~"},
317                    {"=", "\\="},
318                    {"%", "\\%"},
319            });
320
321    /* UNESCAPE TRANSLATORS */
322
323    /**
324     * Translator object for unescaping escaped Java. 
325     *
326     * While {@link #unescapeJava(String)} is the expected method of use, this 
327     * object allows the Java unescaping functionality to be used 
328     * as the foundation for a custom translator.
329     */
330    // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)?
331    public static final CharSequenceTranslator UNESCAPE_JAVA =
332            new AggregateTranslator(
333                    new OctalUnescaper(),     // .between('\1', '\377'),
334                    new UnicodeUnescaper(),
335                    new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()),
336                    new LookupTranslator(
337                            new String[][] {
338                                    {"\\\\", "\\"},
339                                    {"\\\"", "\""},
340                                    {"\\'", "'"},
341                                    {"\\", ""}
342                            })
343            );
344
345    /**
346     * Translator object for unescaping escaped EcmaScript. 
347     *
348     * While {@link #unescapeEcmaScript(String)} is the expected method of use, this 
349     * object allows the EcmaScript unescaping functionality to be used 
350     * as the foundation for a custom translator.
351     */
352    public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA;
353
354    /**
355     * Translator object for unescaping escaped Json.
356     *
357     * While {@link #unescapeJson(String)} is the expected method of use, this
358     * object allows the Json unescaping functionality to be used
359     * as the foundation for a custom translator.
360     */
361    public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA;
362
363    /**
364     * Translator object for unescaping escaped HTML 3.0. 
365     *
366     * While {@link #unescapeHtml3(String)} is the expected method of use, this 
367     * object allows the HTML unescaping functionality to be used 
368     * as the foundation for a custom translator.
369     */
370    public static final CharSequenceTranslator UNESCAPE_HTML3 =
371            new AggregateTranslator(
372                    new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
373                    new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
374                    new NumericEntityUnescaper()
375            );
376
377    /**
378     * Translator object for unescaping escaped HTML 4.0. 
379     *
380     * While {@link #unescapeHtml4(String)} is the expected method of use, this 
381     * object allows the HTML unescaping functionality to be used 
382     * as the foundation for a custom translator.
383     */
384    public static final CharSequenceTranslator UNESCAPE_HTML4 =
385            new AggregateTranslator(
386                    new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
387                    new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
388                    new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()),
389                    new NumericEntityUnescaper()
390            );
391
392    /**
393     * Translator object for unescaping escaped XML.
394     *
395     * While {@link #unescapeXml(String)} is the expected method of use, this 
396     * object allows the XML unescaping functionality to be used 
397     * as the foundation for a custom translator.
398     */
399    public static final CharSequenceTranslator UNESCAPE_XML =
400            new AggregateTranslator(
401                    new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
402                    new LookupTranslator(EntityArrays.APOS_UNESCAPE()),
403                    new NumericEntityUnescaper()
404            );
405
406    /**
407     * Translator object for unescaping escaped Comma Separated Value entries.
408     *
409     * While {@link #unescapeCsv(String)} is the expected method of use, this 
410     * object allows the CSV unescaping functionality to be used 
411     * as the foundation for a custom translator.
412     */
413    public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper();
414
415    static class CsvUnescaper extends CharSequenceTranslator {
416
417        private static final char CSV_DELIMITER = ',';
418        private static final char CSV_QUOTE = '"';
419        private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
420        private static final char[] CSV_SEARCH_CHARS =
421                new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};
422
423        @Override
424        public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
425
426            if(index != 0) {
427                throw new IllegalStateException("CsvUnescaper should never reach the [1] index");
428            }
429
430            if ( input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE ) {
431                out.write(input.toString());
432                return Character.codePointCount(input, 0, input.length());
433            }
434
435            // strip quotes
436            final String quoteless = input.subSequence(1, input.length() - 1).toString();
437
438            if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) {
439                // deal with escaped quotes; ie) ""
440                out.write(StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR));
441            } else {
442                out.write(input.toString());
443            }
444            return Character.codePointCount(input, 0, input.length());
445        }
446    }
447
448    public static final CharSequenceTranslator UNESCAPE_XSI = new XsiUnescaper();
449
450    /**
451     * Translator object for unescaping backslash escaped entries.
452     */
453    static class XsiUnescaper extends CharSequenceTranslator {
454
455        private static final char BACKSLASH = '\\';
456
457        @Override
458        public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
459
460            if(index != 0) {
461                throw new IllegalStateException("XsiUnescaper should never reach the [1] index");
462            }
463
464            String s = input.toString();
465
466            int segmentStart = 0;
467            int searchOffset = 0;
468            while (true) {
469                int pos = s.indexOf(BACKSLASH, searchOffset);
470                if (pos == -1) {
471                    if (segmentStart < s.length()) {
472                        out.write(s.substring(segmentStart));
473                    }
474                    break;
475                }
476                if (pos > segmentStart) {
477                    out.write(s.substring(segmentStart, pos));
478                }
479                segmentStart = pos + 1;
480                searchOffset = pos + 2;
481            }
482
483            return Character.codePointCount(input, 0, input.length());
484        }
485    }
486
487    /* Helper functions */
488
489    /**
490     * <p>{@code StringEscapeUtils} instances should NOT be constructed in
491     * standard programming.</p>
492     *
493     * <p>Instead, the class should be used as:</p>
494     * <pre>StringEscapeUtils.escapeJava("foo");</pre>
495     *
496     * <p>This constructor is public to permit tools that require a JavaBean
497     * instance to operate.</p>
498     */
499    public StringEscapeUtils() {
500        super();
501    }
502
503    /**
504     * <p>Convenience wrapper for {@link java.lang.StringBuilder} providing escape methods.</p>
505     *
506     * <p>Example:</p>
507     * <pre>
508     * new Builder(ESCAPE_HTML4)
509     *      .append("&lt;p&gt;")
510     *      .escape("This is paragraph 1 and special chars like &amp; get escaped.")
511     *      .append("&lt;/p&gt;&lt;p&gt;")
512     *      .escape("This is paragraph 2 &amp; more...")
513     *      .append("&lt;/p&gt;")
514     *      .toString()
515     * </pre>
516     *
517     */
518    public static class Builder {
519
520        private final StringBuilder sb;
521        private final CharSequenceTranslator translator;
522
523        private Builder(final CharSequenceTranslator translator) {
524            this.sb = new StringBuilder();
525            this.translator = translator;
526        }
527
528        /**
529         * <p>Escape {@code input} according to the given {@link CharSequenceTranslator}.</p>
530         *
531         * @param input the String to escape
532         * @return {@code this}, to enable chaining
533         */
534        public Builder escape(final String input) {
535            sb.append(translator.translate(input));
536            return this;
537        }
538
539        /**
540         * Literal append, no escaping being done.
541         *
542         * @param input the String to append
543         * @return {@code this}, to enable chaining
544         */
545        public Builder append(final String input) {
546            sb.append(input);
547            return this;
548        }
549
550        /**
551         * <p>Return the escaped string.</p>
552         *
553         * @return the escaped string
554         */
555        @Override
556        public String toString() {
557            return sb.toString();
558        }
559    }
560
561    /**
562     * Get a {@link Builder}.
563     * @param translator the text translator
564     * @return {@link Builder}
565     */
566    public static StringEscapeUtils.Builder builder(final CharSequenceTranslator translator) {
567        return new Builder(translator);
568    }
569
570    // Java and JavaScript
571    //--------------------------------------------------------------------------
572    /**
573     * <p>Escapes the characters in a {@code String} using Java String rules.</p>
574     *
575     * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
576     *
577     * <p>So a tab becomes the characters {@code '\\'} and
578     * {@code 't'}.</p>
579     *
580     * <p>The only difference between Java strings and JavaScript strings
581     * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p>
582     *
583     * <p>Example:</p>
584     * <pre>
585     * input string: He didn't say, "Stop!"
586     * output string: He didn't say, \"Stop!\"
587     * </pre>
588     *
589     * @param input  String to escape values in, may be null
590     * @return String with escaped values, {@code null} if null string input
591     */
592    public static final String escapeJava(final String input) {
593        return ESCAPE_JAVA.translate(input);
594    }
595
596    /**
597     * <p>Escapes the characters in a {@code String} using EcmaScript String rules.</p>
598     * <p>Escapes any values it finds into their EcmaScript String form.
599     * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
600     *
601     * <p>So a tab becomes the characters {@code '\\'} and
602     * {@code 't'}.</p>
603     *
604     * <p>The only difference between Java strings and EcmaScript strings
605     * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p>
606     *
607     * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects. </p>
608     *
609     * <p>Example:</p>
610     * <pre>
611     * input string: He didn't say, "Stop!"
612     * output string: He didn\'t say, \"Stop!\"
613     * </pre>
614     *
615     * <b>Security Note.</b> We only provide backslash escaping in this method. For example, {@code '\"'} has the output
616     * {@code '\\\"'} which could result in potential issues in the case where the string being escaped is being used
617     * in an HTML tag like {@code <select onmouseover="..." />}. If you wish to have more rigorous string escaping, you
618     * may consider the
619     * <a href="https://www.owasp.org/index.php/Category:OWASP_Enterprise_Security_API_JAVA">ESAPI Libraries</a>. Further,
620     * you can view the
621     * <a href="https://github.com/esapi">ESAPI GitHub Org</a>.
622     *
623     * @param input  String to escape values in, may be null
624     * @return String with escaped values, {@code null} if null string input
625     */
626    public static final String escapeEcmaScript(final String input) {
627        return ESCAPE_ECMASCRIPT.translate(input);
628    }
629
630    /**
631     * <p>Escapes the characters in a {@code String} using Json String rules.</p>
632     * <p>Escapes any values it finds into their Json String form.
633     * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
634     *
635     * <p>So a tab becomes the characters {@code '\\'} and
636     * {@code 't'}.</p>
637     *
638     * <p>The only difference between Java strings and Json strings
639     * is that in Json, forward-slash (/) is escaped.</p>
640     *
641     * <p>See http://www.ietf.org/rfc/rfc4627.txt for further details. </p>
642     *
643     * <p>Example:</p>
644     * <pre>
645     * input string: He didn't say, "Stop!"
646     * output string: He didn't say, \"Stop!\"
647     * </pre>
648     *
649     * @param input  String to escape values in, may be null
650     * @return String with escaped values, {@code null} if null string input
651     */
652    public static final String escapeJson(final String input) {
653        return ESCAPE_JSON.translate(input);
654    }
655
656    /**
657     * <p>Unescapes any Java literals found in the {@code String}.
658     * For example, it will turn a sequence of {@code '\'} and
659     * {@code 'n'} into a newline character, unless the {@code '\'}
660     * is preceded by another {@code '\'}.</p>
661     *
662     * @param input  the {@code String} to unescape, may be null
663     * @return a new unescaped {@code String}, {@code null} if null string input
664     */
665    public static final String unescapeJava(final String input) {
666        return UNESCAPE_JAVA.translate(input);
667    }
668
669    /**
670     * <p>Unescapes any EcmaScript literals found in the {@code String}.</p>
671     *
672     * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
673     * into a newline character, unless the {@code '\'} is preceded by another
674     * {@code '\'}.</p>
675     *
676     * @see #unescapeJava(String)
677     * @param input  the {@code String} to unescape, may be null
678     * @return A new unescaped {@code String}, {@code null} if null string input
679     */
680    public static final String unescapeEcmaScript(final String input) {
681        return UNESCAPE_ECMASCRIPT.translate(input);
682    }
683
684    /**
685     * <p>Unescapes any Json literals found in the {@code String}.</p>
686     *
687     * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
688     * into a newline character, unless the {@code '\'} is preceded by another
689     * {@code '\'}.</p>
690     *
691     * @see #unescapeJava(String)
692     * @param input  the {@code String} to unescape, may be null
693     * @return A new unescaped {@code String}, {@code null} if null string input
694     */
695    public static final String unescapeJson(final String input) {
696        return UNESCAPE_JSON.translate(input);
697    }
698
699    // HTML and XML
700    //--------------------------------------------------------------------------
701    /**
702     * <p>Escapes the characters in a {@code String} using HTML entities.</p>
703     *
704     * <p>
705     * For example:
706     * </p> 
707     * <p><code>"bread" &amp; "butter"</code></p>
708     * becomes:
709     * <p>
710     * <code>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</code>.
711     * </p>
712     *
713     * <p>Supports all known HTML 4.0 entities, including funky accents.
714     * Note that the commonly used apostrophe escape character (&amp;apos;)
715     * is not a legal entity and so is not supported). </p>
716     *
717     * @param input  the {@code String} to escape, may be null
718     * @return a new escaped {@code String}, {@code null} if null string input
719     *
720     * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
721     * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
722     * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
723     * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
724     * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
725     */
726    public static final String escapeHtml4(final String input) {
727        return ESCAPE_HTML4.translate(input);
728    }
729
730    /**
731     * <p>Escapes the characters in a {@code String} using HTML entities.
732     * But escapes them only once. i.e. does not escape already escaped characters.</p>
733     *
734     * <p>
735     * For example:
736     * </p>
737     * <p><code>"bread" &amp; "butter"</code></p>
738     * becomes:
739     * <p>
740     * <code>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</code>.
741     * </p>
742     *
743     * <p>
744     * But:
745     * </p>
746     * <p><code>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</code></p>
747     * remains unaffected.
748     *
749     * <p>Supports all known HTML 4.0 entities, including funky accents.
750     * Note that the commonly used apostrophe escape character (&amp;apos;)
751     * is not a legal entity and so is not supported). </p>
752     *
753     * @param input  the {@code String} to escape, may be null
754     * @return a new escaped {@code String}, {@code null} if null string input
755     *
756     * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
757     * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
758     * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
759     * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
760     * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
761     */
762    public static final String escapeHtml4Once(final String input) {
763        return ESCAPE_HTML4_ONCE.translate(input);
764    }
765
766
767    /**
768     * <p>Escapes the characters in a {@code String} using HTML entities.</p>
769     * <p>Supports only the HTML 3.0 entities. </p>
770     *
771     * @param input  the {@code String} to escape, may be null
772     * @return a new escaped {@code String}, {@code null} if null string input
773     */
774    public static final String escapeHtml3(final String input) {
775        return ESCAPE_HTML3.translate(input);
776    }
777
778    /**
779     * <p>Escapes the characters in a {@code String} using HTML entities.
780     * But escapes them only once. i.e. does not escape already escaped characters.</p>
781     * <p>Supports only the HTML 3.0 entities. </p>
782     *
783     * @param input  the {@code String} to escape, may be null
784     * @return a new escaped {@code String}, {@code null} if null string input
785     */
786    public static final String escapeHtml3Once(final String input) {
787        return ESCAPE_HTML3_ONCE.translate(input);
788    }
789
790    //-----------------------------------------------------------------------
791    /**
792     * <p>Unescapes a string containing entity escapes to a string
793     * containing the actual Unicode characters corresponding to the
794     * escapes. Supports HTML 4.0 entities.</p>
795     *
796     * <p>For example, the string {@code "&lt;Fran&ccedil;ais&gt;"}
797     * will become {@code "<Fran�ais>"}</p>
798     *
799     * <p>If an entity is unrecognized, it is left alone, and inserted
800     * verbatim into the result string. e.g. {@code "&gt;&zzzz;x"} will
801     * become {@code ">&zzzz;x"}.</p>
802     *
803     * @param input  the {@code String} to unescape, may be null
804     * @return a new unescaped {@code String}, {@code null} if null string input
805     */
806    public static final String unescapeHtml4(final String input) {
807        return UNESCAPE_HTML4.translate(input);
808    }
809
810    /**
811     * <p>Unescapes a string containing entity escapes to a string
812     * containing the actual Unicode characters corresponding to the
813     * escapes. Supports only HTML 3.0 entities.</p>
814     *
815     * @param input  the {@code String} to unescape, may be null
816     * @return a new unescaped {@code String}, {@code null} if null string input
817     */
818    public static final String unescapeHtml3(final String input) {
819        return UNESCAPE_HTML3.translate(input);
820    }
821
822    /**
823     * <p>Escapes the characters in a {@code String} using XML entities.</p>
824     *
825     * <p>For example: {@code "bread" & "butter"} =&gt;
826     * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
827     * </p>
828     *
829     * <p>Note that XML 1.0 is a text-only format: it cannot represent control
830     * characters or unpaired Unicode surrogate codepoints, even after escaping.
831     * {@code escapeXml10} will remove characters that do not fit in the
832     * following ranges:</p>
833     *
834     * <p>{@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
835     *
836     * <p>Though not strictly necessary, {@code escapeXml10} will escape
837     * characters in the following ranges:</p>
838     *
839     * <p>{@code [#x7F-#x84] | [#x86-#x9F]}</p>
840     *
841     * <p>The returned string can be inserted into a valid XML 1.0 or XML 1.1
842     * document. If you want to allow more non-text characters in an XML 1.1
843     * document, use {@link #escapeXml11(String)}.</p>
844     *
845     * @param input  the {@code String} to escape, may be null
846     * @return a new escaped {@code String}, {@code null} if null string input
847     * @see #unescapeXml(java.lang.String)
848     */
849    public static String escapeXml10(final String input) {
850        return ESCAPE_XML10.translate(input);
851    }
852
853    /**
854     * <p>Escapes the characters in a {@code String} using XML entities.</p>
855     *
856     * <p>For example: {@code "bread" & "butter"} =&gt;
857     * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
858     * </p>
859     *
860     * <p>XML 1.1 can represent certain control characters, but it cannot represent
861     * the null byte or unpaired Unicode surrogate codepoints, even after escaping.
862     * {@code escapeXml11} will remove characters that do not fit in the following
863     * ranges:</p>
864     *
865     * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
866     *
867     * <p>{@code escapeXml11} will escape characters in the following ranges:</p>
868     *
869     * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p>
870     *
871     * <p>The returned string can be inserted into a valid XML 1.1 document. Do not
872     * use it for XML 1.0 documents.</p>
873     *
874     * @param input  the {@code String} to escape, may be null
875     * @return a new escaped {@code String}, {@code null} if null string input
876     * @see #unescapeXml(java.lang.String)
877     */
878    public static String escapeXml11(final String input) {
879        return ESCAPE_XML11.translate(input);
880    }
881
882    //-----------------------------------------------------------------------
883    /**
884     * <p>Unescapes a string containing XML entity escapes to a string
885     * containing the actual Unicode characters corresponding to the
886     * escapes.</p>
887     *
888     * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
889     * Does not support DTDs or external entities.</p>
890     *
891     * <p>Note that numerical \\u Unicode codes are unescaped to their respective 
892     *    Unicode characters. This may change in future releases. </p>
893     *
894     * @param input  the {@code String} to unescape, may be null
895     * @return a new unescaped {@code String}, {@code null} if null string input
896     * @see #escapeXml10(String)
897     * @see #escapeXml11(String)
898     */
899    public static final String unescapeXml(final String input) {
900        return UNESCAPE_XML.translate(input);
901    }
902
903    //-----------------------------------------------------------------------
904
905    /**
906     * <p>Returns a {@code String} value for a CSV column enclosed in double quotes,
907     * if required.</p>
908     *
909     * <p>If the value contains a comma, newline or double quote, then the
910     *    String value is returned enclosed in double quotes.</p>
911     *
912     * <p>Any double quote characters in the value are escaped with another double quote.</p>
913     *
914     * <p>If the value does not contain a comma, newline or double quote, then the
915     *    String value is returned unchanged.</p>
916     *
917     * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
918     * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
919     *
920     * @param input the input CSV column String, may be null
921     * @return the input String, enclosed in double quotes if the value contains a comma,
922     * newline or double quote, {@code null} if null string input
923     */
924    public static final String escapeCsv(final String input) {
925        return ESCAPE_CSV.translate(input);
926    }
927
928    /**
929     * <p>Returns a {@code String} value for an unescaped CSV column. </p>
930     *
931     * <p>If the value is enclosed in double quotes, and contains a comma, newline 
932     *    or double quote, then quotes are removed. 
933     * </p>
934     *
935     * <p>Any double quote escaped characters (a pair of double quotes) are unescaped 
936     *    to just one double quote. </p>
937     *
938     * <p>If the value is not enclosed in double quotes, or is and does not contain a 
939     *    comma, newline or double quote, then the String value is returned unchanged.</p>
940     *
941     * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
942     * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
943     *
944     * @param input the input CSV column String, may be null
945     * @return the input String, with enclosing double quotes removed and embedded double 
946     * quotes unescaped, {@code null} if null string input
947     */
948    public static final String unescapeCsv(final String input) {
949        return UNESCAPE_CSV.translate(input);
950    }
951
952    /**
953     * <p>Escapes the characters in a {@code String} using XSI rules.</p>
954     *
955     * <p><b>Beware!</b> In most cases you don't want to escape shell commands but use multi-argument
956     * methods provided by {@link java.lang.ProcessBuilder} or {@link java.lang.Runtime#exec(String[])}
957     * instead.</p>
958     *
959     * <p>Example:</p>
960     * <pre>
961     * input string: He didn't say, "Stop!"
962     * output string: He\ didn\'t\ say,\ \"Stop!\"
963     * </pre>
964     *
965     * @see <a href="http://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a>
966     * @param input  String to escape values in, may be null
967     * @return String with escaped values, {@code null} if null string input
968     */
969    public static final String escapeXSI(final String input) {
970        return ESCAPE_XSI.translate(input);
971    }
972
973    /**
974     * <p>Unescapes the characters in a {@code String} using XSI rules.</p>
975     *
976     * @see StringEscapeUtils#escapeXSI(String)
977     * @param input  the {@code String} to unescape, may be null
978     * @return a new unescaped {@code String}, {@code null} if null string input
979     */
980    public static final String unescapeXSI(final String input) {
981        return UNESCAPE_XSI.translate(input);
982    }
983
984}