001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 * 
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 * 
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.lang3;
018
019import java.io.IOException;
020import java.io.Writer;
021
022import org.apache.commons.lang3.text.translate.AggregateTranslator;
023import org.apache.commons.lang3.text.translate.CharSequenceTranslator;
024import org.apache.commons.lang3.text.translate.EntityArrays;
025import org.apache.commons.lang3.text.translate.JavaUnicodeEscaper;
026import org.apache.commons.lang3.text.translate.LookupTranslator;
027import org.apache.commons.lang3.text.translate.NumericEntityEscaper;
028import org.apache.commons.lang3.text.translate.NumericEntityUnescaper;
029import org.apache.commons.lang3.text.translate.OctalUnescaper;
030import org.apache.commons.lang3.text.translate.UnicodeUnescaper;
031import org.apache.commons.lang3.text.translate.UnicodeUnpairedSurrogateRemover;
032
033/**
034 * <p>Escapes and unescapes {@code String}s for
035 * Java, Java Script, HTML and XML.</p>
036 *
037 * <p>#ThreadSafe#</p>
038 * @since 2.0
039 * @version $Id: StringEscapeUtils.java 1630076 2014-10-08 11:49:54Z djones $
040 */
041public class StringEscapeUtils {
042
043    /* ESCAPE TRANSLATORS */
044
045    /**
046     * Translator object for escaping Java. 
047     * 
048     * While {@link #escapeJava(String)} is the expected method of use, this 
049     * object allows the Java escaping functionality to be used 
050     * as the foundation for a custom translator. 
051     *
052     * @since 3.0
053     */
054    public static final CharSequenceTranslator ESCAPE_JAVA = 
055          new LookupTranslator(
056            new String[][] { 
057              {"\"", "\\\""},
058              {"\\", "\\\\"},
059          }).with(
060            new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE())
061          ).with(
062            JavaUnicodeEscaper.outsideOf(32, 0x7f) 
063        );
064
065    /**
066     * Translator object for escaping EcmaScript/JavaScript. 
067     * 
068     * While {@link #escapeEcmaScript(String)} is the expected method of use, this 
069     * object allows the EcmaScript escaping functionality to be used 
070     * as the foundation for a custom translator. 
071     *
072     * @since 3.0
073     */
074    public static final CharSequenceTranslator ESCAPE_ECMASCRIPT = 
075        new AggregateTranslator(
076            new LookupTranslator(
077                      new String[][] { 
078                            {"'", "\\'"},
079                            {"\"", "\\\""},
080                            {"\\", "\\\\"},
081                            {"/", "\\/"}
082                      }),
083            new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
084            JavaUnicodeEscaper.outsideOf(32, 0x7f) 
085        );
086
087    /**
088     * Translator object for escaping Json.
089     *
090     * While {@link #escapeJson(String)} is the expected method of use, this
091     * object allows the Json escaping functionality to be used
092     * as the foundation for a custom translator.
093     *
094     * @since 3.2
095     */
096    public static final CharSequenceTranslator ESCAPE_JSON =
097        new AggregateTranslator(
098            new LookupTranslator(
099                      new String[][] {
100                            {"\"", "\\\""},
101                            {"\\", "\\\\"},
102                            {"/", "\\/"}
103                      }),
104            new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
105            JavaUnicodeEscaper.outsideOf(32, 0x7f)
106        );
107
108    /**
109     * Translator object for escaping XML.
110     * 
111     * While {@link #escapeXml(String)} is the expected method of use, this 
112     * object allows the XML escaping functionality to be used 
113     * as the foundation for a custom translator. 
114     *
115     * @since 3.0
116     * @deprecated use {@link #ESCAPE_XML10} or {@link #ESCAPE_XML11} instead.
117     */
118    @Deprecated
119    public static final CharSequenceTranslator ESCAPE_XML = 
120        new AggregateTranslator(
121            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
122            new LookupTranslator(EntityArrays.APOS_ESCAPE())
123        );
124    
125    /**
126     * Translator object for escaping XML 1.0.
127     * 
128     * While {@link #escapeXml10(String)} is the expected method of use, this
129     * object allows the XML escaping functionality to be used
130     * as the foundation for a custom translator.
131     *
132     * @since 3.3
133     */
134    public static final CharSequenceTranslator ESCAPE_XML10 =
135        new AggregateTranslator(
136            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
137            new LookupTranslator(EntityArrays.APOS_ESCAPE()),
138            new LookupTranslator(
139                    new String[][] {
140                            { "\u0000", "" },
141                            { "\u0001", "" },
142                            { "\u0002", "" },
143                            { "\u0003", "" },
144                            { "\u0004", "" },
145                            { "\u0005", "" },
146                            { "\u0006", "" },
147                            { "\u0007", "" },
148                            { "\u0008", "" },
149                            { "\u000b", "" },
150                            { "\u000c", "" },
151                            { "\u000e", "" },
152                            { "\u000f", "" },
153                            { "\u0010", "" },
154                            { "\u0011", "" },
155                            { "\u0012", "" },
156                            { "\u0013", "" },
157                            { "\u0014", "" },
158                            { "\u0015", "" },
159                            { "\u0016", "" },
160                            { "\u0017", "" },
161                            { "\u0018", "" },
162                            { "\u0019", "" },
163                            { "\u001a", "" },
164                            { "\u001b", "" },
165                            { "\u001c", "" },
166                            { "\u001d", "" },
167                            { "\u001e", "" },
168                            { "\u001f", "" },
169                            { "\ufffe", "" },
170                            { "\uffff", "" }
171                    }),
172            NumericEntityEscaper.between(0x7f, 0x84),
173            NumericEntityEscaper.between(0x86, 0x9f),
174            new UnicodeUnpairedSurrogateRemover()
175        );
176    
177    /**
178     * Translator object for escaping XML 1.1.
179     * 
180     * While {@link #escapeXml11(String)} is the expected method of use, this
181     * object allows the XML escaping functionality to be used
182     * as the foundation for a custom translator.
183     *
184     * @since 3.3
185     */
186    public static final CharSequenceTranslator ESCAPE_XML11 =
187        new AggregateTranslator(
188            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
189            new LookupTranslator(EntityArrays.APOS_ESCAPE()),
190            new LookupTranslator(
191                    new String[][] {
192                            { "\u0000", "" },
193                            { "\u000b", "&#11;" },
194                            { "\u000c", "&#12;" },
195                            { "\ufffe", "" },
196                            { "\uffff", "" }
197                    }),
198            NumericEntityEscaper.between(0x1, 0x8),
199            NumericEntityEscaper.between(0xe, 0x1f),
200            NumericEntityEscaper.between(0x7f, 0x84),
201            NumericEntityEscaper.between(0x86, 0x9f),
202            new UnicodeUnpairedSurrogateRemover()
203        );
204
205    /**
206     * Translator object for escaping HTML version 3.0.
207     * 
208     * While {@link #escapeHtml3(String)} is the expected method of use, this 
209     * object allows the HTML escaping functionality to be used 
210     * as the foundation for a custom translator. 
211     *
212     * @since 3.0
213     */
214    public static final CharSequenceTranslator ESCAPE_HTML3 = 
215        new AggregateTranslator(
216            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
217            new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE())
218        );
219
220    /**
221     * Translator object for escaping HTML version 4.0.
222     * 
223     * While {@link #escapeHtml4(String)} is the expected method of use, this 
224     * object allows the HTML escaping functionality to be used 
225     * as the foundation for a custom translator. 
226     *
227     * @since 3.0
228     */
229    public static final CharSequenceTranslator ESCAPE_HTML4 = 
230        new AggregateTranslator(
231            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
232            new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()),
233            new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE())
234        );
235
236    /**
237     * Translator object for escaping individual Comma Separated Values. 
238     * 
239     * While {@link #escapeCsv(String)} is the expected method of use, this 
240     * object allows the CSV escaping functionality to be used 
241     * as the foundation for a custom translator. 
242     *
243     * @since 3.0
244     */
245    public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper();
246
247    // TODO: Create a parent class - 'SinglePassTranslator' ?
248    //       It would handle the index checking + length returning, 
249    //       and could also have an optimization check method.
250    static class CsvEscaper extends CharSequenceTranslator {
251
252        private static final char CSV_DELIMITER = ',';
253        private static final char CSV_QUOTE = '"';
254        private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
255        private static final char[] CSV_SEARCH_CHARS = 
256            new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};
257
258        @Override
259        public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
260
261            if(index != 0) {
262                throw new IllegalStateException("CsvEscaper should never reach the [1] index");
263            }
264
265            if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) {
266                out.write(input.toString());
267            } else {
268                out.write(CSV_QUOTE);
269                out.write(StringUtils.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR));
270                out.write(CSV_QUOTE);
271            }
272            return Character.codePointCount(input, 0, input.length());
273        }
274    }
275
276    /* UNESCAPE TRANSLATORS */
277
278    /**
279     * Translator object for unescaping escaped Java. 
280     * 
281     * While {@link #unescapeJava(String)} is the expected method of use, this 
282     * object allows the Java unescaping functionality to be used 
283     * as the foundation for a custom translator. 
284     *
285     * @since 3.0
286     */
287    // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)?
288    public static final CharSequenceTranslator UNESCAPE_JAVA = 
289        new AggregateTranslator(
290            new OctalUnescaper(),     // .between('\1', '\377'),
291            new UnicodeUnescaper(),
292            new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()),
293            new LookupTranslator(
294                      new String[][] { 
295                            {"\\\\", "\\"},
296                            {"\\\"", "\""},
297                            {"\\'", "'"},
298                            {"\\", ""}
299                      })
300        );
301
302    /**
303     * Translator object for unescaping escaped EcmaScript. 
304     * 
305     * While {@link #unescapeEcmaScript(String)} is the expected method of use, this 
306     * object allows the EcmaScript unescaping functionality to be used 
307     * as the foundation for a custom translator. 
308     *
309     * @since 3.0
310     */
311    public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA;
312
313    /**
314     * Translator object for unescaping escaped Json.
315     *
316     * While {@link #unescapeJson(String)} is the expected method of use, this
317     * object allows the Json unescaping functionality to be used
318     * as the foundation for a custom translator.
319     *
320     * @since 3.2
321     */
322    public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA;
323
324    /**
325     * Translator object for unescaping escaped HTML 3.0. 
326     * 
327     * While {@link #unescapeHtml3(String)} is the expected method of use, this 
328     * object allows the HTML unescaping functionality to be used 
329     * as the foundation for a custom translator. 
330     *
331     * @since 3.0
332     */
333    public static final CharSequenceTranslator UNESCAPE_HTML3 = 
334        new AggregateTranslator(
335            new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
336            new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
337            new NumericEntityUnescaper()
338        );
339
340    /**
341     * Translator object for unescaping escaped HTML 4.0. 
342     * 
343     * While {@link #unescapeHtml4(String)} is the expected method of use, this 
344     * object allows the HTML unescaping functionality to be used 
345     * as the foundation for a custom translator. 
346     *
347     * @since 3.0
348     */
349    public static final CharSequenceTranslator UNESCAPE_HTML4 = 
350        new AggregateTranslator(
351            new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
352            new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
353            new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()),
354            new NumericEntityUnescaper()
355        );
356
357    /**
358     * Translator object for unescaping escaped XML.
359     * 
360     * While {@link #unescapeXml(String)} is the expected method of use, this 
361     * object allows the XML unescaping functionality to be used 
362     * as the foundation for a custom translator. 
363     *
364     * @since 3.0
365     */
366    public static final CharSequenceTranslator UNESCAPE_XML = 
367        new AggregateTranslator(
368            new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
369            new LookupTranslator(EntityArrays.APOS_UNESCAPE()),
370            new NumericEntityUnescaper()
371        );
372
373    /**
374     * Translator object for unescaping escaped Comma Separated Value entries.
375     * 
376     * While {@link #unescapeCsv(String)} is the expected method of use, this 
377     * object allows the CSV unescaping functionality to be used 
378     * as the foundation for a custom translator. 
379     *
380     * @since 3.0
381     */
382    public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper();
383
384    static class CsvUnescaper extends CharSequenceTranslator {
385
386        private static final char CSV_DELIMITER = ',';
387        private static final char CSV_QUOTE = '"';
388        private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
389        private static final char[] CSV_SEARCH_CHARS = 
390            new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};
391
392        @Override
393        public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
394
395            if(index != 0) {
396                throw new IllegalStateException("CsvUnescaper should never reach the [1] index");
397            }
398
399            if ( input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE ) {
400                out.write(input.toString());
401                return Character.codePointCount(input, 0, input.length());
402            }
403
404            // strip quotes
405            final String quoteless = input.subSequence(1, input.length() - 1).toString();
406
407            if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) {
408                // deal with escaped quotes; ie) ""
409                out.write(StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR));
410            } else {
411                out.write(input.toString());
412            }
413            return Character.codePointCount(input, 0, input.length());
414        }
415    }
416
417    /* Helper functions */
418
419    /**
420     * <p>{@code StringEscapeUtils} instances should NOT be constructed in
421     * standard programming.</p>
422     *
423     * <p>Instead, the class should be used as:</p>
424     * <pre>StringEscapeUtils.escapeJava("foo");</pre>
425     *
426     * <p>This constructor is public to permit tools that require a JavaBean
427     * instance to operate.</p>
428     */
429    public StringEscapeUtils() {
430      super();
431    }
432
433    // Java and JavaScript
434    //--------------------------------------------------------------------------
435    /**
436     * <p>Escapes the characters in a {@code String} using Java String rules.</p>
437     *
438     * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
439     *
440     * <p>So a tab becomes the characters {@code '\\'} and
441     * {@code 't'}.</p>
442     *
443     * <p>The only difference between Java strings and JavaScript strings
444     * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p>
445     *
446     * <p>Example:</p>
447     * <pre>
448     * input string: He didn't say, "Stop!"
449     * output string: He didn't say, \"Stop!\"
450     * </pre>
451     *
452     * @param input  String to escape values in, may be null
453     * @return String with escaped values, {@code null} if null string input
454     */
455    public static final String escapeJava(final String input) {
456        return ESCAPE_JAVA.translate(input);
457    }
458
459    /**
460     * <p>Escapes the characters in a {@code String} using EcmaScript String rules.</p>
461     * <p>Escapes any values it finds into their EcmaScript String form.
462     * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
463     *
464     * <p>So a tab becomes the characters {@code '\\'} and
465     * {@code 't'}.</p>
466     *
467     * <p>The only difference between Java strings and EcmaScript strings
468     * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p>
469     *
470     * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects. </p>
471     *
472     * <p>Example:</p>
473     * <pre>
474     * input string: He didn't say, "Stop!"
475     * output string: He didn\'t say, \"Stop!\"
476     * </pre>
477     *
478     * @param input  String to escape values in, may be null
479     * @return String with escaped values, {@code null} if null string input
480     *
481     * @since 3.0
482     */
483    public static final String escapeEcmaScript(final String input) {
484        return ESCAPE_ECMASCRIPT.translate(input);
485    }
486
487    /**
488     * <p>Escapes the characters in a {@code String} using Json String rules.</p>
489     * <p>Escapes any values it finds into their Json String form.
490     * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
491     *
492     * <p>So a tab becomes the characters {@code '\\'} and
493     * {@code 't'}.</p>
494     *
495     * <p>The only difference between Java strings and Json strings
496     * is that in Json, forward-slash (/) is escaped.</p>
497     *
498     * <p>See http://www.ietf.org/rfc/rfc4627.txt for further details. </p>
499     *
500     * <p>Example:</p>
501     * <pre>
502     * input string: He didn't say, "Stop!"
503     * output string: He didn't say, \"Stop!\"
504     * </pre>
505     *
506     * @param input  String to escape values in, may be null
507     * @return String with escaped values, {@code null} if null string input
508     *
509     * @since 3.2
510     */
511    public static final String escapeJson(final String input) {
512        return ESCAPE_JSON.translate(input);
513    }
514
515    /**
516     * <p>Unescapes any Java literals found in the {@code String}.
517     * For example, it will turn a sequence of {@code '\'} and
518     * {@code 'n'} into a newline character, unless the {@code '\'}
519     * is preceded by another {@code '\'}.</p>
520     * 
521     * @param input  the {@code String} to unescape, may be null
522     * @return a new unescaped {@code String}, {@code null} if null string input
523     */
524    public static final String unescapeJava(final String input) {
525        return UNESCAPE_JAVA.translate(input);
526    }
527
528    /**
529     * <p>Unescapes any EcmaScript literals found in the {@code String}.</p>
530     *
531     * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
532     * into a newline character, unless the {@code '\'} is preceded by another
533     * {@code '\'}.</p>
534     *
535     * @see #unescapeJava(String)
536     * @param input  the {@code String} to unescape, may be null
537     * @return A new unescaped {@code String}, {@code null} if null string input
538     *
539     * @since 3.0
540     */
541    public static final String unescapeEcmaScript(final String input) {
542        return UNESCAPE_ECMASCRIPT.translate(input);
543    }
544
545    /**
546     * <p>Unescapes any Json literals found in the {@code String}.</p>
547     *
548     * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
549     * into a newline character, unless the {@code '\'} is preceded by another
550     * {@code '\'}.</p>
551     *
552     * @see #unescapeJava(String)
553     * @param input  the {@code String} to unescape, may be null
554     * @return A new unescaped {@code String}, {@code null} if null string input
555     *
556     * @since 3.2
557     */
558    public static final String unescapeJson(final String input) {
559        return UNESCAPE_JSON.translate(input);
560    }
561
562    // HTML and XML
563    //--------------------------------------------------------------------------
564    /**
565     * <p>Escapes the characters in a {@code String} using HTML entities.</p>
566     *
567     * <p>
568     * For example:
569     * </p> 
570     * <p><code>"bread" &amp; "butter"</code></p>
571     * becomes:
572     * <p>
573     * <code>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</code>.
574     * </p>
575     *
576     * <p>Supports all known HTML 4.0 entities, including funky accents.
577     * Note that the commonly used apostrophe escape character (&amp;apos;)
578     * is not a legal entity and so is not supported). </p>
579     *
580     * @param input  the {@code String} to escape, may be null
581     * @return a new escaped {@code String}, {@code null} if null string input
582     * 
583     * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
584     * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
585     * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
586     * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
587     * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
588     * 
589     * @since 3.0
590     */
591    public static final String escapeHtml4(final String input) {
592        return ESCAPE_HTML4.translate(input);
593    }
594
595    /**
596     * <p>Escapes the characters in a {@code String} using HTML entities.</p>
597     * <p>Supports only the HTML 3.0 entities. </p>
598     *
599     * @param input  the {@code String} to escape, may be null
600     * @return a new escaped {@code String}, {@code null} if null string input
601     * 
602     * @since 3.0
603     */
604    public static final String escapeHtml3(final String input) {
605        return ESCAPE_HTML3.translate(input);
606    }
607
608    //-----------------------------------------------------------------------
609    /**
610     * <p>Unescapes a string containing entity escapes to a string
611     * containing the actual Unicode characters corresponding to the
612     * escapes. Supports HTML 4.0 entities.</p>
613     *
614     * <p>For example, the string {@code "&lt;Fran&ccedil;ais&gt;"}
615     * will become {@code "<Français>"}</p>
616     *
617     * <p>If an entity is unrecognized, it is left alone, and inserted
618     * verbatim into the result string. e.g. {@code "&gt;&zzzz;x"} will
619     * become {@code ">&zzzz;x"}.</p>
620     *
621     * @param input  the {@code String} to unescape, may be null
622     * @return a new unescaped {@code String}, {@code null} if null string input
623     * 
624     * @since 3.0
625     */
626    public static final String unescapeHtml4(final String input) {
627        return UNESCAPE_HTML4.translate(input);
628    }
629
630    /**
631     * <p>Unescapes a string containing entity escapes to a string
632     * containing the actual Unicode characters corresponding to the
633     * escapes. Supports only HTML 3.0 entities.</p>
634     *
635     * @param input  the {@code String} to unescape, may be null
636     * @return a new unescaped {@code String}, {@code null} if null string input
637     * 
638     * @since 3.0
639     */
640    public static final String unescapeHtml3(final String input) {
641        return UNESCAPE_HTML3.translate(input);
642    }
643
644    //-----------------------------------------------------------------------
645    /**
646     * <p>Escapes the characters in a {@code String} using XML entities.</p>
647     *
648     * <p>For example: {@code "bread" & "butter"} =&gt;
649     * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
650     * </p>
651     *
652     * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
653     * Does not support DTDs or external entities.</p>
654     *
655     * <p>Note that Unicode characters greater than 0x7f are as of 3.0, no longer 
656     *    escaped. If you still wish this functionality, you can achieve it 
657     *    via the following: 
658     * {@code StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) );}</p>
659     *
660     * @param input  the {@code String} to escape, may be null
661     * @return a new escaped {@code String}, {@code null} if null string input
662     * @see #unescapeXml(java.lang.String)
663     * @deprecated use {@link #escapeXml10(java.lang.String)} or {@link #escapeXml11(java.lang.String)} instead.
664     */
665    @Deprecated
666    public static final String escapeXml(final String input) {
667        return ESCAPE_XML.translate(input);
668    }
669
670    /**
671     * <p>Escapes the characters in a {@code String} using XML entities.</p>
672     *
673     * <p>For example: {@code "bread" & "butter"} =&gt;
674     * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
675     * </p>
676     *
677     * <p>Note that XML 1.0 is a text-only format: it cannot represent control
678     * characters or unpaired Unicode surrogate codepoints, even after escaping.
679     * {@code escapeXml10} will remove characters that do not fit in the
680     * following ranges:</p>
681     * 
682     * <p>{@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
683     * 
684     * <p>Though not strictly necessary, {@code escapeXml10} will escape
685     * characters in the following ranges:</p>
686     * 
687     * <p>{@code [#x7F-#x84] | [#x86-#x9F]}</p>
688     * 
689     * <p>The returned string can be inserted into a valid XML 1.0 or XML 1.1
690     * document. If you want to allow more non-text characters in an XML 1.1
691     * document, use {@link #escapeXml11(String)}.</p>
692     *
693     * @param input  the {@code String} to escape, may be null
694     * @return a new escaped {@code String}, {@code null} if null string input
695     * @see #unescapeXml(java.lang.String)
696     * @since 3.3
697     */
698    public static String escapeXml10(final String input) {
699        return ESCAPE_XML10.translate(input);
700    }
701    
702    /**
703     * <p>Escapes the characters in a {@code String} using XML entities.</p>
704     *
705     * <p>For example: {@code "bread" & "butter"} =&gt;
706     * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
707     * </p>
708     *
709     * <p>XML 1.1 can represent certain control characters, but it cannot represent
710     * the null byte or unpaired Unicode surrogate codepoints, even after escaping.
711     * {@code escapeXml11} will remove characters that do not fit in the following
712     * ranges:</p>
713     * 
714     * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
715     * 
716     * <p>{@code escapeXml11} will escape characters in the following ranges:</p>
717     * 
718     * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p>
719     * 
720     * <p>The returned string can be inserted into a valid XML 1.1 document. Do not
721     * use it for XML 1.0 documents.</p>
722     *
723     * @param input  the {@code String} to escape, may be null
724     * @return a new escaped {@code String}, {@code null} if null string input
725     * @see #unescapeXml(java.lang.String)
726     * @since 3.3
727     */
728    public static String escapeXml11(final String input) {
729        return ESCAPE_XML11.translate(input);
730    }
731
732    //-----------------------------------------------------------------------
733    /**
734     * <p>Unescapes a string containing XML entity escapes to a string
735     * containing the actual Unicode characters corresponding to the
736     * escapes.</p>
737     *
738     * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
739     * Does not support DTDs or external entities.</p>
740     *
741     * <p>Note that numerical \\u Unicode codes are unescaped to their respective 
742     *    Unicode characters. This may change in future releases. </p>
743     *
744     * @param input  the {@code String} to unescape, may be null
745     * @return a new unescaped {@code String}, {@code null} if null string input
746     * @see #escapeXml(String)
747     * @see #escapeXml10(String)
748     * @see #escapeXml11(String)
749     */
750    public static final String unescapeXml(final String input) {
751        return UNESCAPE_XML.translate(input);
752    }
753
754    //-----------------------------------------------------------------------
755
756    /**
757     * <p>Returns a {@code String} value for a CSV column enclosed in double quotes,
758     * if required.</p>
759     *
760     * <p>If the value contains a comma, newline or double quote, then the
761     *    String value is returned enclosed in double quotes.</p>
762     *
763     * <p>Any double quote characters in the value are escaped with another double quote.</p>
764     *
765     * <p>If the value does not contain a comma, newline or double quote, then the
766     *    String value is returned unchanged.</p>
767     *
768     * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
769     * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
770     *
771     * @param input the input CSV column String, may be null
772     * @return the input String, enclosed in double quotes if the value contains a comma,
773     * newline or double quote, {@code null} if null string input
774     * @since 2.4
775     */
776    public static final String escapeCsv(final String input) {
777        return ESCAPE_CSV.translate(input);
778    }
779
780    /**
781     * <p>Returns a {@code String} value for an unescaped CSV column. </p>
782     *
783     * <p>If the value is enclosed in double quotes, and contains a comma, newline 
784     *    or double quote, then quotes are removed. 
785     * </p>
786     *
787     * <p>Any double quote escaped characters (a pair of double quotes) are unescaped 
788     *    to just one double quote. </p>
789     *
790     * <p>If the value is not enclosed in double quotes, or is and does not contain a 
791     *    comma, newline or double quote, then the String value is returned unchanged.</p>
792     *
793     * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
794     * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
795     *
796     * @param input the input CSV column String, may be null
797     * @return the input String, with enclosing double quotes removed and embedded double 
798     * quotes unescaped, {@code null} if null string input
799     * @since 2.4
800     */
801    public static final String unescapeCsv(final String input) {
802        return UNESCAPE_CSV.translate(input);
803    }
804
805}