001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 * 
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 * 
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.lang3;
018
019import java.io.IOException;
020import java.io.Writer;
021
022import org.apache.commons.lang3.text.translate.AggregateTranslator;
023import org.apache.commons.lang3.text.translate.CharSequenceTranslator;
024import org.apache.commons.lang3.text.translate.EntityArrays;
025import org.apache.commons.lang3.text.translate.JavaUnicodeEscaper;
026import org.apache.commons.lang3.text.translate.LookupTranslator;
027import org.apache.commons.lang3.text.translate.NumericEntityEscaper;
028import org.apache.commons.lang3.text.translate.NumericEntityUnescaper;
029import org.apache.commons.lang3.text.translate.OctalUnescaper;
030import org.apache.commons.lang3.text.translate.UnicodeUnescaper;
031import org.apache.commons.lang3.text.translate.UnicodeUnpairedSurrogateRemover;
032
033/**
034 * <p>Escapes and unescapes {@code String}s for
035 * Java, Java Script, HTML and XML.</p>
036 *
037 * <p>#ThreadSafe#</p>
038 * @since 2.0
039 */
040public class StringEscapeUtils {
041
042    /* ESCAPE TRANSLATORS */
043
044    /**
045     * Translator object for escaping Java. 
046     * 
047     * While {@link #escapeJava(String)} is the expected method of use, this 
048     * object allows the Java escaping functionality to be used 
049     * as the foundation for a custom translator. 
050     *
051     * @since 3.0
052     */
053    public static final CharSequenceTranslator ESCAPE_JAVA = 
054          new LookupTranslator(
055            new String[][] { 
056              {"\"", "\\\""},
057              {"\\", "\\\\"},
058          }).with(
059            new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE())
060          ).with(
061            JavaUnicodeEscaper.outsideOf(32, 0x7f) 
062        );
063
064    /**
065     * Translator object for escaping EcmaScript/JavaScript. 
066     * 
067     * While {@link #escapeEcmaScript(String)} is the expected method of use, this 
068     * object allows the EcmaScript escaping functionality to be used 
069     * as the foundation for a custom translator. 
070     *
071     * @since 3.0
072     */
073    public static final CharSequenceTranslator ESCAPE_ECMASCRIPT = 
074        new AggregateTranslator(
075            new LookupTranslator(
076                      new String[][] { 
077                            {"'", "\\'"},
078                            {"\"", "\\\""},
079                            {"\\", "\\\\"},
080                            {"/", "\\/"}
081                      }),
082            new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
083            JavaUnicodeEscaper.outsideOf(32, 0x7f) 
084        );
085
086    /**
087     * Translator object for escaping Json.
088     *
089     * While {@link #escapeJson(String)} is the expected method of use, this
090     * object allows the Json escaping functionality to be used
091     * as the foundation for a custom translator.
092     *
093     * @since 3.2
094     */
095    public static final CharSequenceTranslator ESCAPE_JSON =
096        new AggregateTranslator(
097            new LookupTranslator(
098                      new String[][] {
099                            {"\"", "\\\""},
100                            {"\\", "\\\\"},
101                            {"/", "\\/"}
102                      }),
103            new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
104            JavaUnicodeEscaper.outsideOf(32, 0x7f)
105        );
106
107    /**
108     * Translator object for escaping XML.
109     * 
110     * While {@link #escapeXml(String)} is the expected method of use, this 
111     * object allows the XML escaping functionality to be used 
112     * as the foundation for a custom translator. 
113     *
114     * @since 3.0
115     * @deprecated use {@link #ESCAPE_XML10} or {@link #ESCAPE_XML11} instead.
116     */
117    @Deprecated
118    public static final CharSequenceTranslator ESCAPE_XML = 
119        new AggregateTranslator(
120            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
121            new LookupTranslator(EntityArrays.APOS_ESCAPE())
122        );
123    
124    /**
125     * Translator object for escaping XML 1.0.
126     * 
127     * While {@link #escapeXml10(String)} is the expected method of use, this
128     * object allows the XML escaping functionality to be used
129     * as the foundation for a custom translator.
130     *
131     * @since 3.3
132     */
133    public static final CharSequenceTranslator ESCAPE_XML10 =
134        new AggregateTranslator(
135            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
136            new LookupTranslator(EntityArrays.APOS_ESCAPE()),
137            new LookupTranslator(
138                    new String[][] {
139                            { "\u0000", StringUtils.EMPTY },
140                            { "\u0001", StringUtils.EMPTY },
141                            { "\u0002", StringUtils.EMPTY },
142                            { "\u0003", StringUtils.EMPTY },
143                            { "\u0004", StringUtils.EMPTY },
144                            { "\u0005", StringUtils.EMPTY },
145                            { "\u0006", StringUtils.EMPTY },
146                            { "\u0007", StringUtils.EMPTY },
147                            { "\u0008", StringUtils.EMPTY },
148                            { "\u000b", StringUtils.EMPTY },
149                            { "\u000c", StringUtils.EMPTY },
150                            { "\u000e", StringUtils.EMPTY },
151                            { "\u000f", StringUtils.EMPTY },
152                            { "\u0010", StringUtils.EMPTY },
153                            { "\u0011", StringUtils.EMPTY },
154                            { "\u0012", StringUtils.EMPTY },
155                            { "\u0013", StringUtils.EMPTY },
156                            { "\u0014", StringUtils.EMPTY },
157                            { "\u0015", StringUtils.EMPTY },
158                            { "\u0016", StringUtils.EMPTY },
159                            { "\u0017", StringUtils.EMPTY },
160                            { "\u0018", StringUtils.EMPTY },
161                            { "\u0019", StringUtils.EMPTY },
162                            { "\u001a", StringUtils.EMPTY },
163                            { "\u001b", StringUtils.EMPTY },
164                            { "\u001c", StringUtils.EMPTY },
165                            { "\u001d", StringUtils.EMPTY },
166                            { "\u001e", StringUtils.EMPTY },
167                            { "\u001f", StringUtils.EMPTY },
168                            { "\ufffe", StringUtils.EMPTY },
169                            { "\uffff", StringUtils.EMPTY }
170                    }),
171            NumericEntityEscaper.between(0x7f, 0x84),
172            NumericEntityEscaper.between(0x86, 0x9f),
173            new UnicodeUnpairedSurrogateRemover()
174        );
175    
176    /**
177     * Translator object for escaping XML 1.1.
178     * 
179     * While {@link #escapeXml11(String)} is the expected method of use, this
180     * object allows the XML escaping functionality to be used
181     * as the foundation for a custom translator.
182     *
183     * @since 3.3
184     */
185    public static final CharSequenceTranslator ESCAPE_XML11 =
186        new AggregateTranslator(
187            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
188            new LookupTranslator(EntityArrays.APOS_ESCAPE()),
189            new LookupTranslator(
190                    new String[][] {
191                            { "\u0000", StringUtils.EMPTY },
192                            { "\u000b", "&#11;" },
193                            { "\u000c", "&#12;" },
194                            { "\ufffe", StringUtils.EMPTY },
195                            { "\uffff", StringUtils.EMPTY }
196                    }),
197            NumericEntityEscaper.between(0x1, 0x8),
198            NumericEntityEscaper.between(0xe, 0x1f),
199            NumericEntityEscaper.between(0x7f, 0x84),
200            NumericEntityEscaper.between(0x86, 0x9f),
201            new UnicodeUnpairedSurrogateRemover()
202        );
203
204    /**
205     * Translator object for escaping HTML version 3.0.
206     * 
207     * While {@link #escapeHtml3(String)} is the expected method of use, this 
208     * object allows the HTML escaping functionality to be used 
209     * as the foundation for a custom translator. 
210     *
211     * @since 3.0
212     */
213    public static final CharSequenceTranslator ESCAPE_HTML3 = 
214        new AggregateTranslator(
215            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
216            new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE())
217        );
218
219    /**
220     * Translator object for escaping HTML version 4.0.
221     * 
222     * While {@link #escapeHtml4(String)} is the expected method of use, this 
223     * object allows the HTML escaping functionality to be used 
224     * as the foundation for a custom translator. 
225     *
226     * @since 3.0
227     */
228    public static final CharSequenceTranslator ESCAPE_HTML4 = 
229        new AggregateTranslator(
230            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
231            new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()),
232            new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE())
233        );
234
235    /**
236     * Translator object for escaping individual Comma Separated Values. 
237     * 
238     * While {@link #escapeCsv(String)} is the expected method of use, this 
239     * object allows the CSV escaping functionality to be used 
240     * as the foundation for a custom translator. 
241     *
242     * @since 3.0
243     */
244    public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper();
245
246    // TODO: Create a parent class - 'SinglePassTranslator' ?
247    //       It would handle the index checking + length returning, 
248    //       and could also have an optimization check method.
249    static class CsvEscaper extends CharSequenceTranslator {
250
251        private static final char CSV_DELIMITER = ',';
252        private static final char CSV_QUOTE = '"';
253        private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
254        private static final char[] CSV_SEARCH_CHARS = 
255            new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};
256
257        @Override
258        public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
259
260            if(index != 0) {
261                throw new IllegalStateException("CsvEscaper should never reach the [1] index");
262            }
263
264            if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) {
265                out.write(input.toString());
266            } else {
267                out.write(CSV_QUOTE);
268                out.write(StringUtils.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR));
269                out.write(CSV_QUOTE);
270            }
271            return Character.codePointCount(input, 0, input.length());
272        }
273    }
274
275    /* UNESCAPE TRANSLATORS */
276
277    /**
278     * Translator object for unescaping escaped Java. 
279     * 
280     * While {@link #unescapeJava(String)} is the expected method of use, this 
281     * object allows the Java unescaping functionality to be used 
282     * as the foundation for a custom translator. 
283     *
284     * @since 3.0
285     */
286    // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)?
287    public static final CharSequenceTranslator UNESCAPE_JAVA = 
288        new AggregateTranslator(
289            new OctalUnescaper(),     // .between('\1', '\377'),
290            new UnicodeUnescaper(),
291            new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()),
292            new LookupTranslator(
293                      new String[][] { 
294                            {"\\\\", "\\"},
295                            {"\\\"", "\""},
296                            {"\\'", "'"},
297                            {"\\", ""}
298                      })
299        );
300
301    /**
302     * Translator object for unescaping escaped EcmaScript. 
303     * 
304     * While {@link #unescapeEcmaScript(String)} is the expected method of use, this 
305     * object allows the EcmaScript unescaping functionality to be used 
306     * as the foundation for a custom translator. 
307     *
308     * @since 3.0
309     */
310    public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA;
311
312    /**
313     * Translator object for unescaping escaped Json.
314     *
315     * While {@link #unescapeJson(String)} is the expected method of use, this
316     * object allows the Json unescaping functionality to be used
317     * as the foundation for a custom translator.
318     *
319     * @since 3.2
320     */
321    public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA;
322
323    /**
324     * Translator object for unescaping escaped HTML 3.0. 
325     * 
326     * While {@link #unescapeHtml3(String)} is the expected method of use, this 
327     * object allows the HTML unescaping functionality to be used 
328     * as the foundation for a custom translator. 
329     *
330     * @since 3.0
331     */
332    public static final CharSequenceTranslator UNESCAPE_HTML3 = 
333        new AggregateTranslator(
334            new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
335            new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
336            new NumericEntityUnescaper()
337        );
338
339    /**
340     * Translator object for unescaping escaped HTML 4.0. 
341     * 
342     * While {@link #unescapeHtml4(String)} is the expected method of use, this 
343     * object allows the HTML unescaping functionality to be used 
344     * as the foundation for a custom translator. 
345     *
346     * @since 3.0
347     */
348    public static final CharSequenceTranslator UNESCAPE_HTML4 = 
349        new AggregateTranslator(
350            new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
351            new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
352            new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()),
353            new NumericEntityUnescaper()
354        );
355
356    /**
357     * Translator object for unescaping escaped XML.
358     * 
359     * While {@link #unescapeXml(String)} is the expected method of use, this 
360     * object allows the XML unescaping functionality to be used 
361     * as the foundation for a custom translator. 
362     *
363     * @since 3.0
364     */
365    public static final CharSequenceTranslator UNESCAPE_XML = 
366        new AggregateTranslator(
367            new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
368            new LookupTranslator(EntityArrays.APOS_UNESCAPE()),
369            new NumericEntityUnescaper()
370        );
371
372    /**
373     * Translator object for unescaping escaped Comma Separated Value entries.
374     * 
375     * While {@link #unescapeCsv(String)} is the expected method of use, this 
376     * object allows the CSV unescaping functionality to be used 
377     * as the foundation for a custom translator. 
378     *
379     * @since 3.0
380     */
381    public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper();
382
383    static class CsvUnescaper extends CharSequenceTranslator {
384
385        private static final char CSV_DELIMITER = ',';
386        private static final char CSV_QUOTE = '"';
387        private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
388        private static final char[] CSV_SEARCH_CHARS = 
389            new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};
390
391        @Override
392        public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
393
394            if(index != 0) {
395                throw new IllegalStateException("CsvUnescaper should never reach the [1] index");
396            }
397
398            if ( input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE ) {
399                out.write(input.toString());
400                return Character.codePointCount(input, 0, input.length());
401            }
402
403            // strip quotes
404            final String quoteless = input.subSequence(1, input.length() - 1).toString();
405
406            if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) {
407                // deal with escaped quotes; ie) ""
408                out.write(StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR));
409            } else {
410                out.write(input.toString());
411            }
412            return Character.codePointCount(input, 0, input.length());
413        }
414    }
415
416    /* Helper functions */
417
418    /**
419     * <p>{@code StringEscapeUtils} instances should NOT be constructed in
420     * standard programming.</p>
421     *
422     * <p>Instead, the class should be used as:</p>
423     * <pre>StringEscapeUtils.escapeJava("foo");</pre>
424     *
425     * <p>This constructor is public to permit tools that require a JavaBean
426     * instance to operate.</p>
427     */
428    public StringEscapeUtils() {
429      super();
430    }
431
432    // Java and JavaScript
433    //--------------------------------------------------------------------------
434    /**
435     * <p>Escapes the characters in a {@code String} using Java String rules.</p>
436     *
437     * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
438     *
439     * <p>So a tab becomes the characters {@code '\\'} and
440     * {@code 't'}.</p>
441     *
442     * <p>The only difference between Java strings and JavaScript strings
443     * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p>
444     *
445     * <p>Example:</p>
446     * <pre>
447     * input string: He didn't say, "Stop!"
448     * output string: He didn't say, \"Stop!\"
449     * </pre>
450     *
451     * @param input  String to escape values in, may be null
452     * @return String with escaped values, {@code null} if null string input
453     */
454    public static final String escapeJava(final String input) {
455        return ESCAPE_JAVA.translate(input);
456    }
457
458    /**
459     * <p>Escapes the characters in a {@code String} using EcmaScript String rules.</p>
460     * <p>Escapes any values it finds into their EcmaScript String form.
461     * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
462     *
463     * <p>So a tab becomes the characters {@code '\\'} and
464     * {@code 't'}.</p>
465     *
466     * <p>The only difference between Java strings and EcmaScript strings
467     * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p>
468     *
469     * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects. </p>
470     *
471     * <p>Example:</p>
472     * <pre>
473     * input string: He didn't say, "Stop!"
474     * output string: He didn\'t say, \"Stop!\"
475     * </pre>
476     *
477     * @param input  String to escape values in, may be null
478     * @return String with escaped values, {@code null} if null string input
479     *
480     * @since 3.0
481     */
482    public static final String escapeEcmaScript(final String input) {
483        return ESCAPE_ECMASCRIPT.translate(input);
484    }
485
486    /**
487     * <p>Escapes the characters in a {@code String} using Json String rules.</p>
488     * <p>Escapes any values it finds into their Json String form.
489     * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
490     *
491     * <p>So a tab becomes the characters {@code '\\'} and
492     * {@code 't'}.</p>
493     *
494     * <p>The only difference between Java strings and Json strings
495     * is that in Json, forward-slash (/) is escaped.</p>
496     *
497     * <p>See http://www.ietf.org/rfc/rfc4627.txt for further details. </p>
498     *
499     * <p>Example:</p>
500     * <pre>
501     * input string: He didn't say, "Stop!"
502     * output string: He didn't say, \"Stop!\"
503     * </pre>
504     *
505     * @param input  String to escape values in, may be null
506     * @return String with escaped values, {@code null} if null string input
507     *
508     * @since 3.2
509     */
510    public static final String escapeJson(final String input) {
511        return ESCAPE_JSON.translate(input);
512    }
513
514    /**
515     * <p>Unescapes any Java literals found in the {@code String}.
516     * For example, it will turn a sequence of {@code '\'} and
517     * {@code 'n'} into a newline character, unless the {@code '\'}
518     * is preceded by another {@code '\'}.</p>
519     * 
520     * @param input  the {@code String} to unescape, may be null
521     * @return a new unescaped {@code String}, {@code null} if null string input
522     */
523    public static final String unescapeJava(final String input) {
524        return UNESCAPE_JAVA.translate(input);
525    }
526
527    /**
528     * <p>Unescapes any EcmaScript literals found in the {@code String}.</p>
529     *
530     * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
531     * into a newline character, unless the {@code '\'} is preceded by another
532     * {@code '\'}.</p>
533     *
534     * @see #unescapeJava(String)
535     * @param input  the {@code String} to unescape, may be null
536     * @return A new unescaped {@code String}, {@code null} if null string input
537     *
538     * @since 3.0
539     */
540    public static final String unescapeEcmaScript(final String input) {
541        return UNESCAPE_ECMASCRIPT.translate(input);
542    }
543
544    /**
545     * <p>Unescapes any Json literals found in the {@code String}.</p>
546     *
547     * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
548     * into a newline character, unless the {@code '\'} is preceded by another
549     * {@code '\'}.</p>
550     *
551     * @see #unescapeJava(String)
552     * @param input  the {@code String} to unescape, may be null
553     * @return A new unescaped {@code String}, {@code null} if null string input
554     *
555     * @since 3.2
556     */
557    public static final String unescapeJson(final String input) {
558        return UNESCAPE_JSON.translate(input);
559    }
560
561    // HTML and XML
562    //--------------------------------------------------------------------------
563    /**
564     * <p>Escapes the characters in a {@code String} using HTML entities.</p>
565     *
566     * <p>
567     * For example:
568     * </p> 
569     * <p><code>"bread" &amp; "butter"</code></p>
570     * becomes:
571     * <p>
572     * <code>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</code>.
573     * </p>
574     *
575     * <p>Supports all known HTML 4.0 entities, including funky accents.
576     * Note that the commonly used apostrophe escape character (&amp;apos;)
577     * is not a legal entity and so is not supported). </p>
578     *
579     * @param input  the {@code String} to escape, may be null
580     * @return a new escaped {@code String}, {@code null} if null string input
581     * 
582     * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
583     * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
584     * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
585     * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
586     * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
587     * 
588     * @since 3.0
589     */
590    public static final String escapeHtml4(final String input) {
591        return ESCAPE_HTML4.translate(input);
592    }
593
594    /**
595     * <p>Escapes the characters in a {@code String} using HTML entities.</p>
596     * <p>Supports only the HTML 3.0 entities. </p>
597     *
598     * @param input  the {@code String} to escape, may be null
599     * @return a new escaped {@code String}, {@code null} if null string input
600     * 
601     * @since 3.0
602     */
603    public static final String escapeHtml3(final String input) {
604        return ESCAPE_HTML3.translate(input);
605    }
606
607    //-----------------------------------------------------------------------
608    /**
609     * <p>Unescapes a string containing entity escapes to a string
610     * containing the actual Unicode characters corresponding to the
611     * escapes. Supports HTML 4.0 entities.</p>
612     *
613     * <p>For example, the string {@code "&lt;Fran&ccedil;ais&gt;"}
614     * will become {@code "<Français>"}</p>
615     *
616     * <p>If an entity is unrecognized, it is left alone, and inserted
617     * verbatim into the result string. e.g. {@code "&gt;&zzzz;x"} will
618     * become {@code ">&zzzz;x"}.</p>
619     *
620     * @param input  the {@code String} to unescape, may be null
621     * @return a new unescaped {@code String}, {@code null} if null string input
622     * 
623     * @since 3.0
624     */
625    public static final String unescapeHtml4(final String input) {
626        return UNESCAPE_HTML4.translate(input);
627    }
628
629    /**
630     * <p>Unescapes a string containing entity escapes to a string
631     * containing the actual Unicode characters corresponding to the
632     * escapes. Supports only HTML 3.0 entities.</p>
633     *
634     * @param input  the {@code String} to unescape, may be null
635     * @return a new unescaped {@code String}, {@code null} if null string input
636     * 
637     * @since 3.0
638     */
639    public static final String unescapeHtml3(final String input) {
640        return UNESCAPE_HTML3.translate(input);
641    }
642
643    //-----------------------------------------------------------------------
644    /**
645     * <p>Escapes the characters in a {@code String} using XML entities.</p>
646     *
647     * <p>For example: {@code "bread" & "butter"} =&gt;
648     * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
649     * </p>
650     *
651     * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
652     * Does not support DTDs or external entities.</p>
653     *
654     * <p>Note that Unicode characters greater than 0x7f are as of 3.0, no longer 
655     *    escaped. If you still wish this functionality, you can achieve it 
656     *    via the following: 
657     * {@code StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) );}</p>
658     *
659     * @param input  the {@code String} to escape, may be null
660     * @return a new escaped {@code String}, {@code null} if null string input
661     * @see #unescapeXml(java.lang.String)
662     * @deprecated use {@link #escapeXml10(java.lang.String)} or {@link #escapeXml11(java.lang.String)} instead.
663     */
664    @Deprecated
665    public static final String escapeXml(final String input) {
666        return ESCAPE_XML.translate(input);
667    }
668
669    /**
670     * <p>Escapes the characters in a {@code String} using XML entities.</p>
671     *
672     * <p>For example: {@code "bread" & "butter"} =&gt;
673     * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
674     * </p>
675     *
676     * <p>Note that XML 1.0 is a text-only format: it cannot represent control
677     * characters or unpaired Unicode surrogate codepoints, even after escaping.
678     * {@code escapeXml10} will remove characters that do not fit in the
679     * following ranges:</p>
680     * 
681     * <p>{@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
682     * 
683     * <p>Though not strictly necessary, {@code escapeXml10} will escape
684     * characters in the following ranges:</p>
685     * 
686     * <p>{@code [#x7F-#x84] | [#x86-#x9F]}</p>
687     * 
688     * <p>The returned string can be inserted into a valid XML 1.0 or XML 1.1
689     * document. If you want to allow more non-text characters in an XML 1.1
690     * document, use {@link #escapeXml11(String)}.</p>
691     *
692     * @param input  the {@code String} to escape, may be null
693     * @return a new escaped {@code String}, {@code null} if null string input
694     * @see #unescapeXml(java.lang.String)
695     * @since 3.3
696     */
697    public static String escapeXml10(final String input) {
698        return ESCAPE_XML10.translate(input);
699    }
700    
701    /**
702     * <p>Escapes the characters in a {@code String} using XML entities.</p>
703     *
704     * <p>For example: {@code "bread" & "butter"} =&gt;
705     * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
706     * </p>
707     *
708     * <p>XML 1.1 can represent certain control characters, but it cannot represent
709     * the null byte or unpaired Unicode surrogate codepoints, even after escaping.
710     * {@code escapeXml11} will remove characters that do not fit in the following
711     * ranges:</p>
712     * 
713     * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
714     * 
715     * <p>{@code escapeXml11} will escape characters in the following ranges:</p>
716     * 
717     * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p>
718     * 
719     * <p>The returned string can be inserted into a valid XML 1.1 document. Do not
720     * use it for XML 1.0 documents.</p>
721     *
722     * @param input  the {@code String} to escape, may be null
723     * @return a new escaped {@code String}, {@code null} if null string input
724     * @see #unescapeXml(java.lang.String)
725     * @since 3.3
726     */
727    public static String escapeXml11(final String input) {
728        return ESCAPE_XML11.translate(input);
729    }
730
731    //-----------------------------------------------------------------------
732    /**
733     * <p>Unescapes a string containing XML entity escapes to a string
734     * containing the actual Unicode characters corresponding to the
735     * escapes.</p>
736     *
737     * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
738     * Does not support DTDs or external entities.</p>
739     *
740     * <p>Note that numerical \\u Unicode codes are unescaped to their respective 
741     *    Unicode characters. This may change in future releases. </p>
742     *
743     * @param input  the {@code String} to unescape, may be null
744     * @return a new unescaped {@code String}, {@code null} if null string input
745     * @see #escapeXml(String)
746     * @see #escapeXml10(String)
747     * @see #escapeXml11(String)
748     */
749    public static final String unescapeXml(final String input) {
750        return UNESCAPE_XML.translate(input);
751    }
752
753    //-----------------------------------------------------------------------
754
755    /**
756     * <p>Returns a {@code String} value for a CSV column enclosed in double quotes,
757     * if required.</p>
758     *
759     * <p>If the value contains a comma, newline or double quote, then the
760     *    String value is returned enclosed in double quotes.</p>
761     *
762     * <p>Any double quote characters in the value are escaped with another double quote.</p>
763     *
764     * <p>If the value does not contain a comma, newline or double quote, then the
765     *    String value is returned unchanged.</p>
766     *
767     * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
768     * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
769     *
770     * @param input the input CSV column String, may be null
771     * @return the input String, enclosed in double quotes if the value contains a comma,
772     * newline or double quote, {@code null} if null string input
773     * @since 2.4
774     */
775    public static final String escapeCsv(final String input) {
776        return ESCAPE_CSV.translate(input);
777    }
778
779    /**
780     * <p>Returns a {@code String} value for an unescaped CSV column. </p>
781     *
782     * <p>If the value is enclosed in double quotes, and contains a comma, newline 
783     *    or double quote, then quotes are removed. 
784     * </p>
785     *
786     * <p>Any double quote escaped characters (a pair of double quotes) are unescaped 
787     *    to just one double quote. </p>
788     *
789     * <p>If the value is not enclosed in double quotes, or is and does not contain a 
790     *    comma, newline or double quote, then the String value is returned unchanged.</p>
791     *
792     * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
793     * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
794     *
795     * @param input the input CSV column String, may be null
796     * @return the input String, with enclosing double quotes removed and embedded double 
797     * quotes unescaped, {@code null} if null string input
798     * @since 2.4
799     */
800    public static final String unescapeCsv(final String input) {
801        return UNESCAPE_CSV.translate(input);
802    }
803
804}