001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 * 
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 * 
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.lang3;
018
019import java.io.IOException;
020import java.io.Writer;
021
022import org.apache.commons.lang3.text.translate.AggregateTranslator;
023import org.apache.commons.lang3.text.translate.CharSequenceTranslator;
024import org.apache.commons.lang3.text.translate.EntityArrays;
025import org.apache.commons.lang3.text.translate.JavaUnicodeEscaper;
026import org.apache.commons.lang3.text.translate.LookupTranslator;
027import org.apache.commons.lang3.text.translate.NumericEntityEscaper;
028import org.apache.commons.lang3.text.translate.NumericEntityUnescaper;
029import org.apache.commons.lang3.text.translate.OctalUnescaper;
030import org.apache.commons.lang3.text.translate.UnicodeUnescaper;
031import org.apache.commons.lang3.text.translate.UnicodeUnpairedSurrogateRemover;
032
033/**
034 * <p>Escapes and unescapes {@code String}s for
035 * Java, Java Script, HTML and XML.</p>
036 *
037 * <p>#ThreadSafe#</p>
038 * @since 2.0
039 * @version $Id: StringEscapeUtils.java 1573186 2014-03-01 15:53:17Z sebb $
040 */
041public class StringEscapeUtils {
042
043    /* ESCAPE TRANSLATORS */
044
045    /**
046     * Translator object for escaping Java. 
047     * 
048     * While {@link #escapeJava(String)} is the expected method of use, this 
049     * object allows the Java escaping functionality to be used 
050     * as the foundation for a custom translator. 
051     *
052     * @since 3.0
053     */
054    public static final CharSequenceTranslator ESCAPE_JAVA = 
055          new LookupTranslator(
056            new String[][] { 
057              {"\"", "\\\""},
058              {"\\", "\\\\"},
059          }).with(
060            new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE())
061          ).with(
062            JavaUnicodeEscaper.outsideOf(32, 0x7f) 
063        );
064
065    /**
066     * Translator object for escaping EcmaScript/JavaScript. 
067     * 
068     * While {@link #escapeEcmaScript(String)} is the expected method of use, this 
069     * object allows the EcmaScript escaping functionality to be used 
070     * as the foundation for a custom translator. 
071     *
072     * @since 3.0
073     */
074    public static final CharSequenceTranslator ESCAPE_ECMASCRIPT = 
075        new AggregateTranslator(
076            new LookupTranslator(
077                      new String[][] { 
078                            {"'", "\\'"},
079                            {"\"", "\\\""},
080                            {"\\", "\\\\"},
081                            {"/", "\\/"}
082                      }),
083            new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
084            JavaUnicodeEscaper.outsideOf(32, 0x7f) 
085        );
086
087    /**
088     * Translator object for escaping Json.
089     *
090     * While {@link #escapeJson(String)} is the expected method of use, this
091     * object allows the Json escaping functionality to be used
092     * as the foundation for a custom translator.
093     *
094     * @since 3.2
095     */
096    public static final CharSequenceTranslator ESCAPE_JSON =
097        new AggregateTranslator(
098            new LookupTranslator(
099                      new String[][] {
100                            {"\"", "\\\""},
101                            {"\\", "\\\\"},
102                            {"/", "\\/"}
103                      }),
104            new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
105            JavaUnicodeEscaper.outsideOf(32, 0x7f)
106        );
107
108    /**
109     * Translator object for escaping XML.
110     * 
111     * While {@link #escapeXml(String)} is the expected method of use, this 
112     * object allows the XML escaping functionality to be used 
113     * as the foundation for a custom translator. 
114     *
115     * @since 3.0
116     * @deprecated use {@link #ESCAPE_XML10} or {@link #ESCAPE_XML11} instead.
117     */
118    @Deprecated
119    public static final CharSequenceTranslator ESCAPE_XML = 
120        new AggregateTranslator(
121            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
122            new LookupTranslator(EntityArrays.APOS_ESCAPE())
123        );
124    
125    /**
126     * Translator object for escaping XML 1.0.
127     * 
128     * While {@link #escapeXml10(String)} is the expected method of use, this
129     * object allows the XML escaping functionality to be used
130     * as the foundation for a custom translator.
131     *
132     * @since 3.3
133     */
134    public static final CharSequenceTranslator ESCAPE_XML10 =
135        new AggregateTranslator(
136            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
137            new LookupTranslator(EntityArrays.APOS_ESCAPE()),
138            new LookupTranslator(
139                    new String[][] {
140                            { "\u0000", "" },
141                            { "\u0001", "" },
142                            { "\u0002", "" },
143                            { "\u0003", "" },
144                            { "\u0004", "" },
145                            { "\u0005", "" },
146                            { "\u0006", "" },
147                            { "\u0007", "" },
148                            { "\u0008", "" },
149                            { "\u000b", "" },
150                            { "\u000c", "" },
151                            { "\u000e", "" },
152                            { "\u000f", "" },
153                            { "\u0010", "" },
154                            { "\u0011", "" },
155                            { "\u0012", "" },
156                            { "\u0013", "" },
157                            { "\u0014", "" },
158                            { "\u0015", "" },
159                            { "\u0016", "" },
160                            { "\u0017", "" },
161                            { "\u0018", "" },
162                            { "\u0019", "" },
163                            { "\u001a", "" },
164                            { "\u001b", "" },
165                            { "\u001c", "" },
166                            { "\u001d", "" },
167                            { "\u001e", "" },
168                            { "\u001f", "" },
169                            { "\ufffe", "" },
170                            { "\uffff", "" }
171                    }),
172            NumericEntityEscaper.between(0x7f, 0x84),
173            NumericEntityEscaper.between(0x86, 0x9f),
174            new UnicodeUnpairedSurrogateRemover()
175        );
176    
177    /**
178     * Translator object for escaping XML 1.1.
179     * 
180     * While {@link #escapeXml11(String)} is the expected method of use, this
181     * object allows the XML escaping functionality to be used
182     * as the foundation for a custom translator.
183     *
184     * @since 3.3
185     */
186    public static final CharSequenceTranslator ESCAPE_XML11 =
187        new AggregateTranslator(
188            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
189            new LookupTranslator(EntityArrays.APOS_ESCAPE()),
190            new LookupTranslator(
191                    new String[][] {
192                            { "\u0000", "" },
193                            { "\u000b", "&#11;" },
194                            { "\u000c", "&#12;" },
195                            { "\ufffe", "" },
196                            { "\uffff", "" }
197                    }),
198            NumericEntityEscaper.between(0x1, 0x8),
199            NumericEntityEscaper.between(0xe, 0x1f),
200            NumericEntityEscaper.between(0x7f, 0x84),
201            NumericEntityEscaper.between(0x86, 0x9f),
202            new UnicodeUnpairedSurrogateRemover()
203        );
204
205    /**
206     * Translator object for escaping HTML version 3.0.
207     * 
208     * While {@link #escapeHtml3(String)} is the expected method of use, this 
209     * object allows the HTML escaping functionality to be used 
210     * as the foundation for a custom translator. 
211     *
212     * @since 3.0
213     */
214    public static final CharSequenceTranslator ESCAPE_HTML3 = 
215        new AggregateTranslator(
216            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
217            new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE())
218        );
219
220    /**
221     * Translator object for escaping HTML version 4.0.
222     * 
223     * While {@link #escapeHtml4(String)} is the expected method of use, this 
224     * object allows the HTML escaping functionality to be used 
225     * as the foundation for a custom translator. 
226     *
227     * @since 3.0
228     */
229    public static final CharSequenceTranslator ESCAPE_HTML4 = 
230        new AggregateTranslator(
231            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
232            new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()),
233            new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE())
234        );
235
236    /**
237     * Translator object for escaping individual Comma Separated Values. 
238     * 
239     * While {@link #escapeCsv(String)} is the expected method of use, this 
240     * object allows the CSV escaping functionality to be used 
241     * as the foundation for a custom translator. 
242     *
243     * @since 3.0
244     */
245    public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper();
246
247    // TODO: Create a parent class - 'SinglePassTranslator' ?
248    //       It would handle the index checking + length returning, 
249    //       and could also have an optimization check method.
250    static class CsvEscaper extends CharSequenceTranslator {
251
252        private static final char CSV_DELIMITER = ',';
253        private static final char CSV_QUOTE = '"';
254        private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
255        private static final char[] CSV_SEARCH_CHARS = 
256            new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};
257
258        @Override
259        public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
260
261            if(index != 0) {
262                throw new IllegalStateException("CsvEscaper should never reach the [1] index");
263            }
264
265            if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) {
266                out.write(input.toString());
267            } else {
268                out.write(CSV_QUOTE);
269                out.write(StringUtils.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR));
270                out.write(CSV_QUOTE);
271            }
272            return Character.codePointCount(input, 0, input.length());
273        }
274    }
275
276    /* UNESCAPE TRANSLATORS */
277
278    /**
279     * Translator object for unescaping escaped Java. 
280     * 
281     * While {@link #unescapeJava(String)} is the expected method of use, this 
282     * object allows the Java unescaping functionality to be used 
283     * as the foundation for a custom translator. 
284     *
285     * @since 3.0
286     */
287    // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)?
288    public static final CharSequenceTranslator UNESCAPE_JAVA = 
289        new AggregateTranslator(
290            new OctalUnescaper(),     // .between('\1', '\377'),
291            new UnicodeUnescaper(),
292            new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()),
293            new LookupTranslator(
294                      new String[][] { 
295                            {"\\\\", "\\"},
296                            {"\\\"", "\""},
297                            {"\\'", "'"},
298                            {"\\", ""}
299                      })
300        );
301
302    /**
303     * Translator object for unescaping escaped EcmaScript. 
304     * 
305     * While {@link #unescapeEcmaScript(String)} is the expected method of use, this 
306     * object allows the EcmaScript unescaping functionality to be used 
307     * as the foundation for a custom translator. 
308     *
309     * @since 3.0
310     */
311    public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA;
312
313    /**
314     * Translator object for unescaping escaped Json.
315     *
316     * While {@link #unescapeJson(String)} is the expected method of use, this
317     * object allows the Json unescaping functionality to be used
318     * as the foundation for a custom translator.
319     *
320     * @since 3.2
321     */
322    public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA;
323
324    /**
325     * Translator object for unescaping escaped HTML 3.0. 
326     * 
327     * While {@link #unescapeHtml3(String)} is the expected method of use, this 
328     * object allows the HTML unescaping functionality to be used 
329     * as the foundation for a custom translator. 
330     *
331     * @since 3.0
332     */
333    public static final CharSequenceTranslator UNESCAPE_HTML3 = 
334        new AggregateTranslator(
335            new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
336            new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
337            new NumericEntityUnescaper()
338        );
339
340    /**
341     * Translator object for unescaping escaped HTML 4.0. 
342     * 
343     * While {@link #unescapeHtml4(String)} is the expected method of use, this 
344     * object allows the HTML unescaping functionality to be used 
345     * as the foundation for a custom translator. 
346     *
347     * @since 3.0
348     */
349    public static final CharSequenceTranslator UNESCAPE_HTML4 = 
350        new AggregateTranslator(
351            new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
352            new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
353            new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()),
354            new NumericEntityUnescaper()
355        );
356
357    /**
358     * Translator object for unescaping escaped XML.
359     * 
360     * While {@link #unescapeXml(String)} is the expected method of use, this 
361     * object allows the XML unescaping functionality to be used 
362     * as the foundation for a custom translator. 
363     *
364     * @since 3.0
365     */
366    public static final CharSequenceTranslator UNESCAPE_XML = 
367        new AggregateTranslator(
368            new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
369            new LookupTranslator(EntityArrays.APOS_UNESCAPE()),
370            new NumericEntityUnescaper()
371        );
372
373    /**
374     * Translator object for unescaping escaped Comma Separated Value entries.
375     * 
376     * While {@link #unescapeCsv(String)} is the expected method of use, this 
377     * object allows the CSV unescaping functionality to be used 
378     * as the foundation for a custom translator. 
379     *
380     * @since 3.0
381     */
382    public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper();
383
384    static class CsvUnescaper extends CharSequenceTranslator {
385
386        private static final char CSV_DELIMITER = ',';
387        private static final char CSV_QUOTE = '"';
388        private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
389        private static final char[] CSV_SEARCH_CHARS = 
390            new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};
391
392        @Override
393        public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
394
395            if(index != 0) {
396                throw new IllegalStateException("CsvUnescaper should never reach the [1] index");
397            }
398
399            if ( input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE ) {
400                out.write(input.toString());
401                return Character.codePointCount(input, 0, input.length());
402            }
403
404            // strip quotes
405            final String quoteless = input.subSequence(1, input.length() - 1).toString();
406
407            if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) {
408                // deal with escaped quotes; ie) ""
409                out.write(StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR));
410            } else {
411                out.write(input.toString());
412            }
413            return Character.codePointCount(input, 0, input.length());
414        }
415    }
416
417    /* Helper functions */
418
419    /**
420     * <p>{@code StringEscapeUtils} instances should NOT be constructed in
421     * standard programming.</p>
422     *
423     * <p>Instead, the class should be used as:
424     * <pre>StringEscapeUtils.escapeJava("foo");</pre></p>
425     *
426     * <p>This constructor is public to permit tools that require a JavaBean
427     * instance to operate.</p>
428     */
429    public StringEscapeUtils() {
430      super();
431    }
432
433    // Java and JavaScript
434    //--------------------------------------------------------------------------
435    /**
436     * <p>Escapes the characters in a {@code String} using Java String rules.</p>
437     *
438     * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
439     *
440     * <p>So a tab becomes the characters {@code '\\'} and
441     * {@code 't'}.</p>
442     *
443     * <p>The only difference between Java strings and JavaScript strings
444     * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p>
445     *
446     * <p>Example:
447     * <pre>
448     * input string: He didn't say, "Stop!"
449     * output string: He didn't say, \"Stop!\"
450     * </pre>
451     * </p>
452     *
453     * @param input  String to escape values in, may be null
454     * @return String with escaped values, {@code null} if null string input
455     */
456    public static final String escapeJava(final String input) {
457        return ESCAPE_JAVA.translate(input);
458    }
459
460    /**
461     * <p>Escapes the characters in a {@code String} using EcmaScript String rules.</p>
462     * <p>Escapes any values it finds into their EcmaScript String form.
463     * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
464     *
465     * <p>So a tab becomes the characters {@code '\\'} and
466     * {@code 't'}.</p>
467     *
468     * <p>The only difference between Java strings and EcmaScript strings
469     * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p>
470     *
471     * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects. </p>
472     *
473     * <p>Example:
474     * <pre>
475     * input string: He didn't say, "Stop!"
476     * output string: He didn\'t say, \"Stop!\"
477     * </pre>
478     * </p>
479     *
480     * @param input  String to escape values in, may be null
481     * @return String with escaped values, {@code null} if null string input
482     *
483     * @since 3.0
484     */
485    public static final String escapeEcmaScript(final String input) {
486        return ESCAPE_ECMASCRIPT.translate(input);
487    }
488
489    /**
490     * <p>Escapes the characters in a {@code String} using Json String rules.</p>
491     * <p>Escapes any values it finds into their Json String form.
492     * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
493     *
494     * <p>So a tab becomes the characters {@code '\\'} and
495     * {@code 't'}.</p>
496     *
497     * <p>The only difference between Java strings and Json strings
498     * is that in Json, forward-slash (/) is escaped.</p>
499     *
500     * <p>See http://www.ietf.org/rfc/rfc4627.txt for further details. </p>
501     *
502     * <p>Example:
503     * <pre>
504     * input string: He didn't say, "Stop!"
505     * output string: He didn't say, \"Stop!\"
506     * </pre>
507     * </p>
508     *
509     * @param input  String to escape values in, may be null
510     * @return String with escaped values, {@code null} if null string input
511     *
512     * @since 3.2
513     */
514    public static final String escapeJson(final String input) {
515        return ESCAPE_JSON.translate(input);
516    }
517
518    /**
519     * <p>Unescapes any Java literals found in the {@code String}.
520     * For example, it will turn a sequence of {@code '\'} and
521     * {@code 'n'} into a newline character, unless the {@code '\'}
522     * is preceded by another {@code '\'}.</p>
523     * 
524     * @param input  the {@code String} to unescape, may be null
525     * @return a new unescaped {@code String}, {@code null} if null string input
526     */
527    public static final String unescapeJava(final String input) {
528        return UNESCAPE_JAVA.translate(input);
529    }
530
531    /**
532     * <p>Unescapes any EcmaScript literals found in the {@code String}.</p>
533     *
534     * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
535     * into a newline character, unless the {@code '\'} is preceded by another
536     * {@code '\'}.</p>
537     *
538     * @see #unescapeJava(String)
539     * @param input  the {@code String} to unescape, may be null
540     * @return A new unescaped {@code String}, {@code null} if null string input
541     *
542     * @since 3.0
543     */
544    public static final String unescapeEcmaScript(final String input) {
545        return UNESCAPE_ECMASCRIPT.translate(input);
546    }
547
548    /**
549     * <p>Unescapes any Json literals found in the {@code String}.</p>
550     *
551     * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
552     * into a newline character, unless the {@code '\'} is preceded by another
553     * {@code '\'}.</p>
554     *
555     * @see #unescapeJava(String)
556     * @param input  the {@code String} to unescape, may be null
557     * @return A new unescaped {@code String}, {@code null} if null string input
558     *
559     * @since 3.2
560     */
561    public static final String unescapeJson(final String input) {
562        return UNESCAPE_JSON.translate(input);
563    }
564
565    // HTML and XML
566    //--------------------------------------------------------------------------
567    /**
568     * <p>Escapes the characters in a {@code String} using HTML entities.</p>
569     *
570     * <p>
571     * For example:
572     * </p> 
573     * <p><code>"bread" & "butter"</code></p>
574     * becomes:
575     * <p>
576     * <code>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</code>.
577     * </p>
578     *
579     * <p>Supports all known HTML 4.0 entities, including funky accents.
580     * Note that the commonly used apostrophe escape character (&amp;apos;)
581     * is not a legal entity and so is not supported). </p>
582     *
583     * @param input  the {@code String} to escape, may be null
584     * @return a new escaped {@code String}, {@code null} if null string input
585     * 
586     * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
587     * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
588     * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
589     * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
590     * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
591     * 
592     * @since 3.0
593     */
594    public static final String escapeHtml4(final String input) {
595        return ESCAPE_HTML4.translate(input);
596    }
597
598    /**
599     * <p>Escapes the characters in a {@code String} using HTML entities.</p>
600     * <p>Supports only the HTML 3.0 entities. </p>
601     *
602     * @param input  the {@code String} to escape, may be null
603     * @return a new escaped {@code String}, {@code null} if null string input
604     * 
605     * @since 3.0
606     */
607    public static final String escapeHtml3(final String input) {
608        return ESCAPE_HTML3.translate(input);
609    }
610
611    //-----------------------------------------------------------------------
612    /**
613     * <p>Unescapes a string containing entity escapes to a string
614     * containing the actual Unicode characters corresponding to the
615     * escapes. Supports HTML 4.0 entities.</p>
616     *
617     * <p>For example, the string "&amp;lt;Fran&amp;ccedil;ais&amp;gt;"
618     * will become "&lt;Fran&ccedil;ais&gt;"</p>
619     *
620     * <p>If an entity is unrecognized, it is left alone, and inserted
621     * verbatim into the result string. e.g. "&amp;gt;&amp;zzzz;x" will
622     * become "&gt;&amp;zzzz;x".</p>
623     *
624     * @param input  the {@code String} to unescape, may be null
625     * @return a new unescaped {@code String}, {@code null} if null string input
626     * 
627     * @since 3.0
628     */
629    public static final String unescapeHtml4(final String input) {
630        return UNESCAPE_HTML4.translate(input);
631    }
632
633    /**
634     * <p>Unescapes a string containing entity escapes to a string
635     * containing the actual Unicode characters corresponding to the
636     * escapes. Supports only HTML 3.0 entities.</p>
637     *
638     * @param input  the {@code String} to unescape, may be null
639     * @return a new unescaped {@code String}, {@code null} if null string input
640     * 
641     * @since 3.0
642     */
643    public static final String unescapeHtml3(final String input) {
644        return UNESCAPE_HTML3.translate(input);
645    }
646
647    //-----------------------------------------------------------------------
648    /**
649     * <p>Escapes the characters in a {@code String} using XML entities.</p>
650     *
651     * <p>For example: <tt>"bread" & "butter"</tt> =>
652     * <tt>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</tt>.
653     * </p>
654     *
655     * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
656     * Does not support DTDs or external entities.</p>
657     *
658     * <p>Note that Unicode characters greater than 0x7f are as of 3.0, no longer 
659     *    escaped. If you still wish this functionality, you can achieve it 
660     *    via the following: 
661     * {@code StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) );}</p>
662     *
663     * @param input  the {@code String} to escape, may be null
664     * @return a new escaped {@code String}, {@code null} if null string input
665     * @see #unescapeXml(java.lang.String)
666     * @deprecated use {@link #escapeXml10(java.lang.String)} or {@link #escapeXml11(java.lang.String)} instead.
667     */
668    @Deprecated
669    public static final String escapeXml(final String input) {
670        return ESCAPE_XML.translate(input);
671    }
672
673    /**
674     * <p>Escapes the characters in a {@code String} using XML entities.</p>
675     *
676     * <p>For example: <tt>"bread" & "butter"</tt> =>
677     * <tt>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</tt>.
678     * </p>
679     *
680     * <p>Note that XML 1.0 is a text-only format: it cannot represent control
681     * characters or unpaired Unicode surrogate codepoints, even after escaping.
682     * {@code escapeXml10} will remove characters that do not fit in the
683     * following ranges:</p>
684     * 
685     * <p>{@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
686     * 
687     * <p>Though not strictly necessary, {@code escapeXml10} will escape
688     * characters in the following ranges:</p>
689     * 
690     * <p>{@code [#x7F-#x84] | [#x86-#x9F]}</p>
691     * 
692     * <p>The returned string can be inserted into a valid XML 1.0 or XML 1.1
693     * document. If you want to allow more non-text characters in an XML 1.1
694     * document, use {@link #escapeXml11(String)}.</p>
695     *
696     * @param input  the {@code String} to escape, may be null
697     * @return a new escaped {@code String}, {@code null} if null string input
698     * @see #unescapeXml(java.lang.String)
699     * @since 3.3
700     */
701    public static String escapeXml10(final String input) {
702        return ESCAPE_XML10.translate(input);
703    }
704    
705    /**
706     * <p>Escapes the characters in a {@code String} using XML entities.</p>
707     *
708     * <p>For example: <tt>"bread" & "butter"</tt> =>
709     * <tt>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</tt>.
710     * </p>
711     *
712     * <p>XML 1.1 can represent certain control characters, but it cannot represent
713     * the null byte or unpaired Unicode surrogate codepoints, even after escaping.
714     * {@code escapeXml11} will remove characters that do not fit in the following
715     * ranges:</p>
716     * 
717     * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
718     * 
719     * <p>{@code escapeXml11} will escape characters in the following ranges:</p>
720     * 
721     * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p>
722     * 
723     * <p>The returned string can be inserted into a valid XML 1.1 document. Do not
724     * use it for XML 1.0 documents.</p>
725     *
726     * @param input  the {@code String} to escape, may be null
727     * @return a new escaped {@code String}, {@code null} if null string input
728     * @see #unescapeXml(java.lang.String)
729     * @since 3.3
730     */
731    public static String escapeXml11(final String input) {
732        return ESCAPE_XML11.translate(input);
733    }
734
735    //-----------------------------------------------------------------------
736    /**
737     * <p>Unescapes a string containing XML entity escapes to a string
738     * containing the actual Unicode characters corresponding to the
739     * escapes.</p>
740     *
741     * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
742     * Does not support DTDs or external entities.</p>
743     *
744     * <p>Note that numerical \\u Unicode codes are unescaped to their respective 
745     *    Unicode characters. This may change in future releases. </p>
746     *
747     * @param input  the {@code String} to unescape, may be null
748     * @return a new unescaped {@code String}, {@code null} if null string input
749     * @see #escapeXml(String)
750     * @see #escapeXml10(String)
751     * @see #escapeXml11(String)
752     */
753    public static final String unescapeXml(final String input) {
754        return UNESCAPE_XML.translate(input);
755    }
756
757    //-----------------------------------------------------------------------
758
759    /**
760     * <p>Returns a {@code String} value for a CSV column enclosed in double quotes,
761     * if required.</p>
762     *
763     * <p>If the value contains a comma, newline or double quote, then the
764     *    String value is returned enclosed in double quotes.</p>
765     * </p>
766     *
767     * <p>Any double quote characters in the value are escaped with another double quote.</p>
768     *
769     * <p>If the value does not contain a comma, newline or double quote, then the
770     *    String value is returned unchanged.</p>
771     * </p>
772     *
773     * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
774     * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
775     *
776     * @param input the input CSV column String, may be null
777     * @return the input String, enclosed in double quotes if the value contains a comma,
778     * newline or double quote, {@code null} if null string input
779     * @since 2.4
780     */
781    public static final String escapeCsv(final String input) {
782        return ESCAPE_CSV.translate(input);
783    }
784
785    /**
786     * <p>Returns a {@code String} value for an unescaped CSV column. </p>
787     *
788     * <p>If the value is enclosed in double quotes, and contains a comma, newline 
789     *    or double quote, then quotes are removed. 
790     * </p>
791     *
792     * <p>Any double quote escaped characters (a pair of double quotes) are unescaped 
793     *    to just one double quote. </p>
794     *
795     * <p>If the value is not enclosed in double quotes, or is and does not contain a 
796     *    comma, newline or double quote, then the String value is returned unchanged.</p>
797     * </p>
798     *
799     * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
800     * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
801     *
802     * @param input the input CSV column String, may be null
803     * @return the input String, with enclosing double quotes removed and embedded double 
804     * quotes unescaped, {@code null} if null string input
805     * @since 2.4
806     */
807    public static final String unescapeCsv(final String input) {
808        return UNESCAPE_CSV.translate(input);
809    }
810
811}