1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.lang3;
18
19 import java.io.IOException;
20 import java.io.Writer;
21
22 import org.apache.commons.lang3.text.translate.AggregateTranslator;
23 import org.apache.commons.lang3.text.translate.CharSequenceTranslator;
24 import org.apache.commons.lang3.text.translate.EntityArrays;
25 import org.apache.commons.lang3.text.translate.JavaUnicodeEscaper;
26 import org.apache.commons.lang3.text.translate.LookupTranslator;
27 import org.apache.commons.lang3.text.translate.NumericEntityEscaper;
28 import org.apache.commons.lang3.text.translate.NumericEntityUnescaper;
29 import org.apache.commons.lang3.text.translate.OctalUnescaper;
30 import org.apache.commons.lang3.text.translate.UnicodeUnescaper;
31 import org.apache.commons.lang3.text.translate.UnicodeUnpairedSurrogateRemover;
32
33 /**
34 * Escapes and unescapes {@link String}s for
35 * Java, Java Script, HTML and XML.
36 *
37 * <p>#ThreadSafe#</p>
38 * @since 2.0
39 * @deprecated As of 3.6, use Apache Commons Text
40 * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/StringEscapeUtils.html">
41 * StringEscapeUtils</a> instead.
42 */
43 @Deprecated
44 public class StringEscapeUtils {
45
46 /* ESCAPE TRANSLATORS */
47
48 private static final class CsvEscaper extends CharSequenceTranslator {
49
50 private static final char CSV_DELIMITER = ',';
51 private static final char CSV_QUOTE = '"';
52 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
53 private static final char[] CSV_SEARCH_CHARS = { CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF };
54
55 @Override
56 public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
57 if (index != 0) {
58 throw new IllegalStateException("CsvEscaper should never reach the [1] index");
59 }
60 if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) {
61 out.write(input.toString());
62 } else {
63 out.write(CSV_QUOTE);
64 out.write(Strings.CS.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR));
65 out.write(CSV_QUOTE);
66 }
67 return Character.codePointCount(input, 0, input.length());
68 }
69 }
70
71 private static final class CsvUnescaper extends CharSequenceTranslator {
72
73 private static final char CSV_DELIMITER = ',';
74 private static final char CSV_QUOTE = '"';
75 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
76 private static final char[] CSV_SEARCH_CHARS = {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};
77
78 @Override
79 public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
80 if (index != 0) {
81 throw new IllegalStateException("CsvUnescaper should never reach the [1] index");
82 }
83 if (input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE) {
84 out.write(input.toString());
85 return Character.codePointCount(input, 0, input.length());
86 }
87 // strip quotes
88 final String quoteless = input.subSequence(1, input.length() - 1).toString();
89 if (StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS)) {
90 // deal with escaped quotes; ie) ""
91 out.write(Strings.CS.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR));
92 } else {
93 out.write(input.toString());
94 }
95 return Character.codePointCount(input, 0, input.length());
96 }
97 }
98
99 /**
100 * Translator object for escaping Java.
101 *
102 * While {@link #escapeJava(String)} is the expected method of use, this
103 * object allows the Java escaping functionality to be used
104 * as the foundation for a custom translator.
105 *
106 * @since 3.0
107 */
108 public static final CharSequenceTranslator ESCAPE_JAVA =
109 new LookupTranslator(
110 new String[][] {
111 {"\"", "\\\""},
112 {"\\", "\\\\"},
113 }).with(
114 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE())
115 ).with(
116 JavaUnicodeEscaper.outsideOf(32, 0x7f)
117 );
118
119 /**
120 * Translator object for escaping EcmaScript/JavaScript.
121 *
122 * While {@link #escapeEcmaScript(String)} is the expected method of use, this
123 * object allows the EcmaScript escaping functionality to be used
124 * as the foundation for a custom translator.
125 *
126 * @since 3.0
127 */
128 public static final CharSequenceTranslator ESCAPE_ECMASCRIPT =
129 new AggregateTranslator(
130 new LookupTranslator(
131 new String[][] {
132 {"'", "\\'"},
133 {"\"", "\\\""},
134 {"\\", "\\\\"},
135 {"/", "\\/"}
136 }),
137 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
138 JavaUnicodeEscaper.outsideOf(32, 0x7f)
139 );
140
141 /**
142 * Translator object for escaping Json.
143 *
144 * While {@link #escapeJson(String)} is the expected method of use, this
145 * object allows the Json escaping functionality to be used
146 * as the foundation for a custom translator.
147 *
148 * @since 3.2
149 */
150 public static final CharSequenceTranslator ESCAPE_JSON =
151 new AggregateTranslator(
152 new LookupTranslator(
153 new String[][] {
154 {"\"", "\\\""},
155 {"\\", "\\\\"},
156 {"/", "\\/"}
157 }),
158 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
159 JavaUnicodeEscaper.outsideOf(32, 0x7f)
160 );
161
162 /**
163 * Translator object for escaping XML.
164 *
165 * While {@link #escapeXml(String)} is the expected method of use, this
166 * object allows the XML escaping functionality to be used
167 * as the foundation for a custom translator.
168 *
169 * @since 3.0
170 * @deprecated Use {@link #ESCAPE_XML10} or {@link #ESCAPE_XML11} instead.
171 */
172 @Deprecated
173 public static final CharSequenceTranslator ESCAPE_XML =
174 new AggregateTranslator(
175 new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
176 new LookupTranslator(EntityArrays.APOS_ESCAPE())
177 );
178
179 /**
180 * Translator object for escaping XML 1.0.
181 *
182 * While {@link #escapeXml10(String)} is the expected method of use, this
183 * object allows the XML escaping functionality to be used
184 * as the foundation for a custom translator.
185 *
186 * @since 3.3
187 */
188 public static final CharSequenceTranslator ESCAPE_XML10 =
189 new AggregateTranslator(
190 new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
191 new LookupTranslator(EntityArrays.APOS_ESCAPE()),
192 new LookupTranslator(
193 new String[][] {
194 { "\u0000", StringUtils.EMPTY },
195 { "\u0001", StringUtils.EMPTY },
196 { "\u0002", StringUtils.EMPTY },
197 { "\u0003", StringUtils.EMPTY },
198 { "\u0004", StringUtils.EMPTY },
199 { "\u0005", StringUtils.EMPTY },
200 { "\u0006", StringUtils.EMPTY },
201 { "\u0007", StringUtils.EMPTY },
202 { "\u0008", StringUtils.EMPTY },
203 { "\u000b", StringUtils.EMPTY },
204 { "\u000c", StringUtils.EMPTY },
205 { "\u000e", StringUtils.EMPTY },
206 { "\u000f", StringUtils.EMPTY },
207 { "\u0010", StringUtils.EMPTY },
208 { "\u0011", StringUtils.EMPTY },
209 { "\u0012", StringUtils.EMPTY },
210 { "\u0013", StringUtils.EMPTY },
211 { "\u0014", StringUtils.EMPTY },
212 { "\u0015", StringUtils.EMPTY },
213 { "\u0016", StringUtils.EMPTY },
214 { "\u0017", StringUtils.EMPTY },
215 { "\u0018", StringUtils.EMPTY },
216 { "\u0019", StringUtils.EMPTY },
217 { "\u001a", StringUtils.EMPTY },
218 { "\u001b", StringUtils.EMPTY },
219 { "\u001c", StringUtils.EMPTY },
220 { "\u001d", StringUtils.EMPTY },
221 { "\u001e", StringUtils.EMPTY },
222 { "\u001f", StringUtils.EMPTY },
223 { "\ufffe", StringUtils.EMPTY },
224 { "\uffff", StringUtils.EMPTY }
225 }),
226 NumericEntityEscaper.between(0x7f, 0x84),
227 NumericEntityEscaper.between(0x86, 0x9f),
228 new UnicodeUnpairedSurrogateRemover()
229 );
230
231 /**
232 * Translator object for escaping XML 1.1.
233 *
234 * While {@link #escapeXml11(String)} is the expected method of use, this
235 * object allows the XML escaping functionality to be used
236 * as the foundation for a custom translator.
237 *
238 * @since 3.3
239 */
240 public static final CharSequenceTranslator ESCAPE_XML11 =
241 new AggregateTranslator(
242 new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
243 new LookupTranslator(EntityArrays.APOS_ESCAPE()),
244 new LookupTranslator(
245 new String[][] {
246 { "\u0000", StringUtils.EMPTY },
247 { "\u000b", "" },
248 { "\u000c", "" },
249 { "\ufffe", StringUtils.EMPTY },
250 { "\uffff", StringUtils.EMPTY }
251 }),
252 NumericEntityEscaper.between(0x1, 0x8),
253 NumericEntityEscaper.between(0xe, 0x1f),
254 NumericEntityEscaper.between(0x7f, 0x84),
255 NumericEntityEscaper.between(0x86, 0x9f),
256 new UnicodeUnpairedSurrogateRemover()
257 );
258
259 /**
260 * Translator object for escaping HTML version 3.0.
261 *
262 * While {@link #escapeHtml3(String)} is the expected method of use, this
263 * object allows the HTML escaping functionality to be used
264 * as the foundation for a custom translator.
265 *
266 * @since 3.0
267 */
268 public static final CharSequenceTranslator ESCAPE_HTML3 =
269 new AggregateTranslator(
270 new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
271 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE())
272 );
273
274 /**
275 * Translator object for escaping HTML version 4.0.
276 *
277 * While {@link #escapeHtml4(String)} is the expected method of use, this
278 * object allows the HTML escaping functionality to be used
279 * as the foundation for a custom translator.
280 *
281 * @since 3.0
282 */
283 public static final CharSequenceTranslator ESCAPE_HTML4 =
284 new AggregateTranslator(
285 new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
286 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()),
287 new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE())
288 );
289
290 /* UNESCAPE TRANSLATORS */
291
292 /**
293 * Translator object for escaping individual Comma Separated Values.
294 *
295 * While {@link #escapeCsv(String)} is the expected method of use, this
296 * object allows the CSV escaping functionality to be used
297 * as the foundation for a custom translator.
298 *
299 * @since 3.0
300 */
301 public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper();
302
303 /**
304 * Translator object for unescaping escaped Java.
305 *
306 * While {@link #unescapeJava(String)} is the expected method of use, this
307 * object allows the Java unescaping functionality to be used
308 * as the foundation for a custom translator.
309 *
310 * @since 3.0
311 */
312 // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)?
313 public static final CharSequenceTranslator UNESCAPE_JAVA =
314 new AggregateTranslator(
315 new OctalUnescaper(), // .between('\1', '\377'),
316 new UnicodeUnescaper(),
317 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()),
318 new LookupTranslator(
319 new String[][] {
320 {"\\\\", "\\"},
321 {"\\\"", "\""},
322 {"\\'", "'"},
323 {"\\", ""}
324 })
325 );
326
327 /**
328 * Translator object for unescaping escaped EcmaScript.
329 *
330 * While {@link #unescapeEcmaScript(String)} is the expected method of use, this
331 * object allows the EcmaScript unescaping functionality to be used
332 * as the foundation for a custom translator.
333 *
334 * @since 3.0
335 */
336 public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA;
337
338 /**
339 * Translator object for unescaping escaped Json.
340 *
341 * While {@link #unescapeJson(String)} is the expected method of use, this
342 * object allows the Json unescaping functionality to be used
343 * as the foundation for a custom translator.
344 *
345 * @since 3.2
346 */
347 public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA;
348
349 /**
350 * Translator object for unescaping escaped HTML 3.0.
351 *
352 * While {@link #unescapeHtml3(String)} is the expected method of use, this
353 * object allows the HTML unescaping functionality to be used
354 * as the foundation for a custom translator.
355 *
356 * @since 3.0
357 */
358 public static final CharSequenceTranslator UNESCAPE_HTML3 =
359 new AggregateTranslator(
360 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
361 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
362 new NumericEntityUnescaper()
363 );
364
365 /**
366 * Translator object for unescaping escaped HTML 4.0.
367 *
368 * While {@link #unescapeHtml4(String)} is the expected method of use, this
369 * object allows the HTML unescaping functionality to be used
370 * as the foundation for a custom translator.
371 *
372 * @since 3.0
373 */
374 public static final CharSequenceTranslator UNESCAPE_HTML4 =
375 new AggregateTranslator(
376 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
377 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
378 new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()),
379 new NumericEntityUnescaper()
380 );
381
382 /**
383 * Translator object for unescaping escaped XML.
384 *
385 * While {@link #unescapeXml(String)} is the expected method of use, this
386 * object allows the XML unescaping functionality to be used
387 * as the foundation for a custom translator.
388 *
389 * @since 3.0
390 */
391 public static final CharSequenceTranslator UNESCAPE_XML =
392 new AggregateTranslator(
393 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
394 new LookupTranslator(EntityArrays.APOS_UNESCAPE()),
395 new NumericEntityUnescaper()
396 );
397
398 /**
399 * Translator object for unescaping escaped Comma Separated Value entries.
400 *
401 * While {@link #unescapeCsv(String)} is the expected method of use, this
402 * object allows the CSV unescaping functionality to be used
403 * as the foundation for a custom translator.
404 *
405 * @since 3.0
406 */
407 public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper();
408
409 /* Helper functions */
410
411 /**
412 * Returns a {@link String} value for a CSV column enclosed in double quotes,
413 * if required.
414 *
415 * <p>If the value contains a comma, newline or double quote, then the
416 * String value is returned enclosed in double quotes.</p>
417 *
418 * <p>Any double quote characters in the value are escaped with another double quote.</p>
419 *
420 * <p>If the value does not contain a comma, newline or double quote, then the
421 * String value is returned unchanged.</p>
422 *
423 * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
424 * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>.
425 *
426 * @param input the input CSV column String, may be null
427 * @return the input String, enclosed in double quotes if the value contains a comma,
428 * newline or double quote, {@code null} if null string input
429 * @since 2.4
430 */
431 public static final String escapeCsv(final String input) {
432 return ESCAPE_CSV.translate(input);
433 }
434
435 /**
436 * Escapes the characters in a {@link String} using EcmaScript String rules.
437 * <p>Escapes any values it finds into their EcmaScript String form.
438 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
439 *
440 * <p>So a tab becomes the characters {@code '\\'} and
441 * {@code 't'}.</p>
442 *
443 * <p>The only difference between Java strings and EcmaScript strings
444 * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p>
445 *
446 * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects.</p>
447 *
448 * <p>Example:</p>
449 * <pre>
450 * input string: He didn't say, "Stop!"
451 * output string: He didn\'t say, \"Stop!\"
452 * </pre>
453 *
454 * @param input String to escape values in, may be null
455 * @return String with escaped values, {@code null} if null string input
456 * @since 3.0
457 */
458 public static final String escapeEcmaScript(final String input) {
459 return ESCAPE_ECMASCRIPT.translate(input);
460 }
461
462 /**
463 * Escapes the characters in a {@link String} using HTML entities.
464 * <p>Supports only the HTML 3.0 entities.</p>
465 *
466 * @param input the {@link String} to escape, may be null
467 * @return a new escaped {@link String}, {@code null} if null string input
468 * @since 3.0
469 */
470 public static final String escapeHtml3(final String input) {
471 return ESCAPE_HTML3.translate(input);
472 }
473
474 /**
475 * Escapes the characters in a {@link String} using HTML entities.
476 *
477 * <p>
478 * For example:
479 * </p>
480 * <p>{@code "bread" & "butter"}</p>
481 * becomes:
482 * <p>
483 * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
484 * </p>
485 *
486 * <p>Supports all known HTML 4.0 entities, including funky accents.
487 * Note that the commonly used apostrophe escape character (&apos;)
488 * is not a legal entity and so is not supported).</p>
489 *
490 * @param input the {@link String} to escape, may be null
491 * @return a new escaped {@link String}, {@code null} if null string input
492 * @see <a href="https://web.archive.org/web/20060225074150/https://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
493 * @see <a href="https://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
494 * @see <a href="https://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
495 * @see <a href="https://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
496 * @see <a href="https://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
497 * @since 3.0
498 */
499 public static final String escapeHtml4(final String input) {
500 return ESCAPE_HTML4.translate(input);
501 }
502
503 /**
504 * Escapes the characters in a {@link String} using Java String rules.
505 *
506 * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
507 *
508 * <p>So a tab becomes the characters {@code '\\'} and
509 * {@code 't'}.</p>
510 *
511 * <p>The only difference between Java strings and JavaScript strings
512 * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p>
513 *
514 * <p>Example:</p>
515 * <pre>
516 * input string: He didn't say, "Stop!"
517 * output string: He didn't say, \"Stop!\"
518 * </pre>
519 *
520 * @param input String to escape values in, may be null
521 * @return String with escaped values, {@code null} if null string input
522 */
523 public static final String escapeJava(final String input) {
524 return ESCAPE_JAVA.translate(input);
525 }
526
527 /**
528 * Escapes the characters in a {@link String} using Json String rules.
529 * <p>Escapes any values it finds into their Json String form.
530 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
531 *
532 * <p>So a tab becomes the characters {@code '\\'} and
533 * {@code 't'}.</p>
534 *
535 * <p>The only difference between Java strings and Json strings
536 * is that in Json, forward-slash (/) is escaped.</p>
537 *
538 * <p>See https://www.ietf.org/rfc/rfc4627.txt for further details.</p>
539 *
540 * <p>Example:</p>
541 * <pre>
542 * input string: He didn't say, "Stop!"
543 * output string: He didn't say, \"Stop!\"
544 * </pre>
545 *
546 * @param input String to escape values in, may be null
547 * @return String with escaped values, {@code null} if null string input
548 * @since 3.2
549 */
550 public static final String escapeJson(final String input) {
551 return ESCAPE_JSON.translate(input);
552 }
553
554 /**
555 * Escapes the characters in a {@link String} using XML entities.
556 *
557 * <p>For example: {@code "bread" & "butter"} =>
558 * {@code "bread" & "butter"}.
559 * </p>
560 *
561 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
562 * Does not support DTDs or external entities.</p>
563 *
564 * <p>Note that Unicode characters greater than 0x7f are as of 3.0, no longer
565 * escaped. If you still wish this functionality, you can achieve it
566 * via the following:
567 * {@code StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE));}</p>
568 *
569 * @param input the {@link String} to escape, may be null
570 * @return a new escaped {@link String}, {@code null} if null string input
571 * @see #unescapeXml(String)
572 * @deprecated Use {@link #escapeXml10(java.lang.String)} or {@link #escapeXml11(java.lang.String)} instead.
573 */
574 @Deprecated
575 public static final String escapeXml(final String input) {
576 return ESCAPE_XML.translate(input);
577 }
578
579 /**
580 * Escapes the characters in a {@link String} using XML entities.
581 * <p>
582 * For example:
583 * </p>
584 *
585 * <pre>{@code
586 * "bread" & "butter"
587 * }</pre>
588 * <p>
589 * converts to:
590 * </p>
591 *
592 * <pre>
593 * {@code
594 * "bread" & "butter"
595 * }
596 * </pre>
597 *
598 * <p>
599 * Note that XML 1.0 is a text-only format: it cannot represent control characters or unpaired Unicode surrogate code points, even after escaping. The
600 * method {@code escapeXml10} will remove characters that do not fit in the following ranges:
601 * </p>
602 *
603 * <p>
604 * {@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}
605 * </p>
606 *
607 * <p>
608 * Though not strictly necessary, {@code escapeXml10} will escape characters in the following ranges:
609 * </p>
610 *
611 * <p>
612 * {@code [#x7F-#x84] | [#x86-#x9F]}
613 * </p>
614 *
615 * <p>
616 * The returned string can be inserted into a valid XML 1.0 or XML 1.1 document. If you want to allow more non-text characters in an XML 1.1 document, use
617 * {@link #escapeXml11(String)}.
618 * </p>
619 *
620 * @param input the {@link String} to escape, may be null
621 * @return a new escaped {@link String}, {@code null} if null string input
622 * @see #unescapeXml(String)
623 * @since 3.3
624 */
625 public static String escapeXml10(final String input) {
626 return ESCAPE_XML10.translate(input);
627 }
628
629 /**
630 * Escapes the characters in a {@link String} using XML entities.
631 *
632 * <p>For example: {@code "bread" & "butter"} =>
633 * {@code "bread" & "butter"}.
634 * </p>
635 *
636 * <p>XML 1.1 can represent certain control characters, but it cannot represent
637 * the null byte or unpaired Unicode surrogate code points, even after escaping.
638 * {@code escapeXml11} will remove characters that do not fit in the following
639 * ranges:</p>
640 *
641 * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
642 *
643 * <p>{@code escapeXml11} will escape characters in the following ranges:</p>
644 *
645 * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p>
646 *
647 * <p>The returned string can be inserted into a valid XML 1.1 document. Do not
648 * use it for XML 1.0 documents.</p>
649 *
650 * @param input the {@link String} to escape, may be null
651 * @return a new escaped {@link String}, {@code null} if null string input
652 * @see #unescapeXml(String)
653 * @since 3.3
654 */
655 public static String escapeXml11(final String input) {
656 return ESCAPE_XML11.translate(input);
657 }
658
659 /**
660 * Returns a {@link String} value for an unescaped CSV column.
661 *
662 * <p>If the value is enclosed in double quotes, and contains a comma, newline
663 * or double quote, then quotes are removed.
664 * </p>
665 *
666 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped
667 * to just one double quote.</p>
668 *
669 * <p>If the value is not enclosed in double quotes, or is and does not contain a
670 * comma, newline or double quote, then the String value is returned unchanged.</p>
671 *
672 * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
673 * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>.
674 *
675 * @param input the input CSV column String, may be null
676 * @return the input String, with enclosing double quotes removed and embedded double
677 * quotes unescaped, {@code null} if null string input
678 * @since 2.4
679 */
680 public static final String unescapeCsv(final String input) {
681 return UNESCAPE_CSV.translate(input);
682 }
683
684 /**
685 * Unescapes any EcmaScript literals found in the {@link String}.
686 *
687 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
688 * into a newline character, unless the {@code '\'} is preceded by another
689 * {@code '\'}.</p>
690 *
691 * @see #unescapeJava(String)
692 * @param input the {@link String} to unescape, may be null
693 * @return A new unescaped {@link String}, {@code null} if null string input
694 * @since 3.0
695 */
696 public static final String unescapeEcmaScript(final String input) {
697 return UNESCAPE_ECMASCRIPT.translate(input);
698 }
699
700 /**
701 * Unescapes a string containing entity escapes to a string
702 * containing the actual Unicode characters corresponding to the
703 * escapes. Supports only HTML 3.0 entities.
704 *
705 * @param input the {@link String} to unescape, may be null
706 * @return a new unescaped {@link String}, {@code null} if null string input
707 * @since 3.0
708 */
709 public static final String unescapeHtml3(final String input) {
710 return UNESCAPE_HTML3.translate(input);
711 }
712
713 /**
714 * Unescapes a string containing entity escapes to a string
715 * containing the actual Unicode characters corresponding to the
716 * escapes. Supports HTML 4.0 entities.
717 *
718 * <p>For example, the string {@code "<Français>"}
719 * will become {@code "<Français>"}</p>
720 *
721 * <p>If an entity is unrecognized, it is left alone, and inserted
722 * verbatim into the result string. e.g. {@code ">&zzzz;x"} will
723 * become {@code ">&zzzz;x"}.</p>
724 *
725 * @param input the {@link String} to unescape, may be null
726 * @return a new unescaped {@link String}, {@code null} if null string input
727 * @since 3.0
728 */
729 public static final String unescapeHtml4(final String input) {
730 return UNESCAPE_HTML4.translate(input);
731 }
732
733 /**
734 * Unescapes any Java literals found in the {@link String}.
735 * For example, it will turn a sequence of {@code '\'} and
736 * {@code 'n'} into a newline character, unless the {@code '\'}
737 * is preceded by another {@code '\'}.
738 *
739 * @param input the {@link String} to unescape, may be null
740 * @return a new unescaped {@link String}, {@code null} if null string input
741 */
742 public static final String unescapeJava(final String input) {
743 return UNESCAPE_JAVA.translate(input);
744 }
745
746 /**
747 * Unescapes any Json literals found in the {@link String}.
748 *
749 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
750 * into a newline character, unless the {@code '\'} is preceded by another
751 * {@code '\'}.</p>
752 *
753 * @see #unescapeJava(String)
754 * @param input the {@link String} to unescape, may be null
755 * @return A new unescaped {@link String}, {@code null} if null string input
756 * @since 3.2
757 */
758 public static final String unescapeJson(final String input) {
759 return UNESCAPE_JSON.translate(input);
760 }
761
762 /**
763 * Unescapes a string containing XML entity escapes to a string
764 * containing the actual Unicode characters corresponding to the
765 * escapes.
766 *
767 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
768 * Does not support DTDs or external entities.</p>
769 *
770 * <p>Note that numerical \\u Unicode codes are unescaped to their respective
771 * Unicode characters. This may change in future releases.</p>
772 *
773 * @param input the {@link String} to unescape, may be null
774 * @return a new unescaped {@link String}, {@code null} if null string input
775 * @see #escapeXml(String)
776 * @see #escapeXml10(String)
777 * @see #escapeXml11(String)
778 */
779 public static final String unescapeXml(final String input) {
780 return UNESCAPE_XML.translate(input);
781 }
782
783 /**
784 * {@link StringEscapeUtils} instances should NOT be constructed in
785 * standard programming.
786 *
787 * <p>Instead, the class should be used as:</p>
788 * <pre>StringEscapeUtils.escapeJava("foo");</pre>
789 *
790 * <p>This constructor is public to permit tools that require a JavaBean
791 * instance to operate.</p>
792 *
793 * @deprecated TODO Make private in 4.0.
794 */
795 @Deprecated
796 public StringEscapeUtils() {
797 // empty
798 }
799
800 }