1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.text;
18
19 import java.io.IOException;
20 import java.io.Writer;
21 import java.util.Collections;
22 import java.util.HashMap;
23 import java.util.Map;
24
25 import org.apache.commons.lang3.StringUtils;
26 import org.apache.commons.text.translate.AggregateTranslator;
27 import org.apache.commons.text.translate.CharSequenceTranslator;
28 import org.apache.commons.text.translate.CsvTranslators;
29 import org.apache.commons.text.translate.EntityArrays;
30 import org.apache.commons.text.translate.JavaUnicodeEscaper;
31 import org.apache.commons.text.translate.LookupTranslator;
32 import org.apache.commons.text.translate.NumericEntityEscaper;
33 import org.apache.commons.text.translate.NumericEntityUnescaper;
34 import org.apache.commons.text.translate.OctalUnescaper;
35 import org.apache.commons.text.translate.UnicodeUnescaper;
36 import org.apache.commons.text.translate.UnicodeUnpairedSurrogateRemover;
37
38 /**
39 * <p>
40 * Escapes and unescapes {@code String}s for Java, Java Script, HTML and XML.
41 * </p>
42 *
43 * <p>
44 * #ThreadSafe#
45 * </p>
46 *
47 * <p>
48 * This code has been adapted from Apache Commons Lang 3.5.
49 * </p>
50 *
51 * @since 1.0
52 */
53 public class StringEscapeUtils {
54
55 /* ESCAPE TRANSLATORS */
56
57 /**
58 * Convenience wrapper for {@link StringBuilder} providing escape methods.
59 *
60 * <p>Example:</p>
61 * <pre>
62 * new Builder(ESCAPE_HTML4)
63 * .append("<p>")
64 * .escape("This is paragraph 1 and special chars like & get escaped.")
65 * .append("</p><p>")
66 * .escape("This is paragraph 2 & more...")
67 * .append("</p>")
68 * .toString()
69 * </pre>
70 */
71 public static final class Builder {
72
73 /**
74 * StringBuilder to be used in the Builder class.
75 */
76 private final StringBuilder sb;
77
78 /**
79 * CharSequenceTranslator to be used in the Builder class.
80 */
81 private final CharSequenceTranslator translator;
82
83 /**
84 * Builder constructor.
85 *
86 * @param translator a CharSequenceTranslator.
87 */
88 private Builder(final CharSequenceTranslator translator) {
89 this.sb = new StringBuilder();
90 this.translator = translator;
91 }
92
93 /**
94 * Literal append, no escaping being done.
95 *
96 * @param input the String to append
97 * @return {@code this}, to enable chaining
98 */
99 public Builder append(final String input) {
100 sb.append(input);
101 return this;
102 }
103
104 /**
105 * Escape {@code input} according to the given {@link CharSequenceTranslator}.
106 *
107 * @param input the String to escape
108 * @return {@code this}, to enable chaining
109 */
110 public Builder escape(final String input) {
111 sb.append(translator.translate(input));
112 return this;
113 }
114
115 /**
116 * Return the escaped string.
117 *
118 * @return The escaped string
119 */
120 @Override
121 public String toString() {
122 return sb.toString();
123 }
124 }
125 /**
126 * Translator object for unescaping backslash escaped entries.
127 */
128 static class XsiUnescaper extends CharSequenceTranslator {
129
130 /**
131 * Escaped backslash constant.
132 */
133 private static final char BACKSLASH = '\\';
134
135 @Override
136 public int translate(final CharSequence input, final int index, final Writer writer) throws IOException {
137
138 if (index != 0) {
139 throw new IllegalStateException("XsiUnescaper should never reach the [1] index");
140 }
141
142 final String s = input.toString();
143
144 int segmentStart = 0;
145 int searchOffset = 0;
146 while (true) {
147 final int pos = s.indexOf(BACKSLASH, searchOffset);
148 if (pos == -1) {
149 if (segmentStart < s.length()) {
150 writer.write(s.substring(segmentStart));
151 }
152 break;
153 }
154 if (pos > segmentStart) {
155 writer.write(s.substring(segmentStart, pos));
156 }
157 segmentStart = pos + 1;
158 searchOffset = pos + 2;
159 }
160
161 return Character.codePointCount(input, 0, input.length());
162 }
163 }
164
165 /**
166 * Translator object for escaping Java.
167 *
168 * While {@link #escapeJava(String)} is the expected method of use, this object allows the Java escaping functionality to be used as the foundation for a
169 * custom translator.
170 */
171 public static final CharSequenceTranslator ESCAPE_JAVA;
172 static {
173 final Map<CharSequence, CharSequence> escapeJavaMap = new HashMap<>();
174 escapeJavaMap.put("\"", "\\\"");
175 escapeJavaMap.put("\\", "\\\\");
176 ESCAPE_JAVA = new AggregateTranslator(
177 new LookupTranslator(Collections.unmodifiableMap(escapeJavaMap)),
178 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE),
179 JavaUnicodeEscaper.outsideOf(32, 0x7f)
180 );
181 }
182
183 /**
184 * Translator object for escaping EcmaScript/JavaScript.
185 *
186 * While {@link #escapeEcmaScript(String)} is the expected method of use, this object allows the EcmaScript escaping functionality to be used as the
187 * foundation for a custom translator.
188 */
189 public static final CharSequenceTranslator ESCAPE_ECMASCRIPT;
190 static {
191 final Map<CharSequence, CharSequence> escapeEcmaScriptMap = new HashMap<>();
192 escapeEcmaScriptMap.put("'", "\\'");
193 escapeEcmaScriptMap.put("\"", "\\\"");
194 escapeEcmaScriptMap.put("\\", "\\\\");
195 escapeEcmaScriptMap.put("/", "\\/");
196 ESCAPE_ECMASCRIPT = new AggregateTranslator(
197 new LookupTranslator(Collections.unmodifiableMap(escapeEcmaScriptMap)),
198 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE),
199 JavaUnicodeEscaper.outsideOf(32, 0x7f)
200 );
201 }
202
203 /**
204 * Translator object for escaping Json.
205 *
206 * While {@link #escapeJson(String)} is the expected method of use, this object allows the Json escaping functionality to be used as the foundation for a
207 * custom translator.
208 */
209 public static final CharSequenceTranslator ESCAPE_JSON;
210 static {
211 final Map<CharSequence, CharSequence> escapeJsonMap = new HashMap<>();
212 escapeJsonMap.put("\"", "\\\"");
213 escapeJsonMap.put("\\", "\\\\");
214 escapeJsonMap.put("/", "\\/");
215 ESCAPE_JSON = new AggregateTranslator(
216 new LookupTranslator(Collections.unmodifiableMap(escapeJsonMap)),
217 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE),
218 JavaUnicodeEscaper.outsideOf(32, 0x7e)
219 );
220 }
221
222 /**
223 * Translator object for escaping XML 1.0.
224 *
225 * While {@link #escapeXml10(String)} is the expected method of use, this object allows the XML escaping functionality to be used as the foundation for a
226 * custom translator.
227 */
228 public static final CharSequenceTranslator ESCAPE_XML10;
229 static {
230 final Map<CharSequence, CharSequence> escapeXml10Map = new HashMap<>();
231 escapeXml10Map.put("\u0000", StringUtils.EMPTY);
232 escapeXml10Map.put("\u0001", StringUtils.EMPTY);
233 escapeXml10Map.put("\u0002", StringUtils.EMPTY);
234 escapeXml10Map.put("\u0003", StringUtils.EMPTY);
235 escapeXml10Map.put("\u0004", StringUtils.EMPTY);
236 escapeXml10Map.put("\u0005", StringUtils.EMPTY);
237 escapeXml10Map.put("\u0006", StringUtils.EMPTY);
238 escapeXml10Map.put("\u0007", StringUtils.EMPTY);
239 escapeXml10Map.put("\u0008", StringUtils.EMPTY);
240 escapeXml10Map.put("\u000b", StringUtils.EMPTY);
241 escapeXml10Map.put("\u000c", StringUtils.EMPTY);
242 escapeXml10Map.put("\u000e", StringUtils.EMPTY);
243 escapeXml10Map.put("\u000f", StringUtils.EMPTY);
244 escapeXml10Map.put("\u0010", StringUtils.EMPTY);
245 escapeXml10Map.put("\u0011", StringUtils.EMPTY);
246 escapeXml10Map.put("\u0012", StringUtils.EMPTY);
247 escapeXml10Map.put("\u0013", StringUtils.EMPTY);
248 escapeXml10Map.put("\u0014", StringUtils.EMPTY);
249 escapeXml10Map.put("\u0015", StringUtils.EMPTY);
250 escapeXml10Map.put("\u0016", StringUtils.EMPTY);
251 escapeXml10Map.put("\u0017", StringUtils.EMPTY);
252 escapeXml10Map.put("\u0018", StringUtils.EMPTY);
253 escapeXml10Map.put("\u0019", StringUtils.EMPTY);
254 escapeXml10Map.put("\u001a", StringUtils.EMPTY);
255 escapeXml10Map.put("\u001b", StringUtils.EMPTY);
256 escapeXml10Map.put("\u001c", StringUtils.EMPTY);
257 escapeXml10Map.put("\u001d", StringUtils.EMPTY);
258 escapeXml10Map.put("\u001e", StringUtils.EMPTY);
259 escapeXml10Map.put("\u001f", StringUtils.EMPTY);
260 escapeXml10Map.put("\ufffe", StringUtils.EMPTY);
261 escapeXml10Map.put("\uffff", StringUtils.EMPTY);
262 ESCAPE_XML10 = new AggregateTranslator(
263 new LookupTranslator(EntityArrays.BASIC_ESCAPE),
264 new LookupTranslator(EntityArrays.APOS_ESCAPE),
265 new LookupTranslator(Collections.unmodifiableMap(escapeXml10Map)),
266 NumericEntityEscaper.between(0x7f, 0x84),
267 NumericEntityEscaper.between(0x86, 0x9f),
268 new UnicodeUnpairedSurrogateRemover()
269 );
270 }
271
272 /**
273 * Translator object for escaping XML 1.1.
274 *
275 * While {@link #escapeXml11(String)} is the expected method of use, this
276 * object allows the XML escaping functionality to be used
277 * as the foundation for a custom translator.
278 */
279 public static final CharSequenceTranslator ESCAPE_XML11;
280
281 static {
282 final Map<CharSequence, CharSequence> escapeXml11Map = new HashMap<>();
283 escapeXml11Map.put("\u0000", StringUtils.EMPTY);
284 escapeXml11Map.put("\u000b", "");
285 escapeXml11Map.put("\u000c", "");
286 escapeXml11Map.put("\ufffe", StringUtils.EMPTY);
287 escapeXml11Map.put("\uffff", StringUtils.EMPTY);
288 ESCAPE_XML11 = new AggregateTranslator(
289 new LookupTranslator(EntityArrays.BASIC_ESCAPE),
290 new LookupTranslator(EntityArrays.APOS_ESCAPE),
291 new LookupTranslator(Collections.unmodifiableMap(escapeXml11Map)),
292 NumericEntityEscaper.between(0x1, 0x8),
293 NumericEntityEscaper.between(0xe, 0x1f),
294 NumericEntityEscaper.between(0x7f, 0x84),
295 NumericEntityEscaper.between(0x86, 0x9f),
296 new UnicodeUnpairedSurrogateRemover()
297 );
298 }
299
300 /**
301 * Translator object for escaping HTML version 3.0.
302 *
303 * While {@link #escapeHtml3(String)} is the expected method of use, this
304 * object allows the HTML escaping functionality to be used
305 * as the foundation for a custom translator.
306 */
307 public static final CharSequenceTranslator ESCAPE_HTML3 =
308 new AggregateTranslator(
309 new LookupTranslator(EntityArrays.BASIC_ESCAPE),
310 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE)
311 );
312
313 /**
314 * Translator object for escaping HTML version 4.0.
315 *
316 * While {@link #escapeHtml4(String)} is the expected method of use, this
317 * object allows the HTML escaping functionality to be used
318 * as the foundation for a custom translator.
319 */
320 public static final CharSequenceTranslator ESCAPE_HTML4 =
321 new AggregateTranslator(
322 new LookupTranslator(EntityArrays.BASIC_ESCAPE),
323 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE),
324 new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE)
325 );
326 /**
327 * Translator object for escaping individual Comma Separated Values.
328 *
329 * While {@link #escapeCsv(String)} is the expected method of use, this
330 * object allows the CSV escaping functionality to be used
331 * as the foundation for a custom translator.
332 */
333 public static final CharSequenceTranslator ESCAPE_CSV = new CsvTranslators.CsvEscaper();
334
335 /* UNESCAPE TRANSLATORS */
336
337 /**
338 * Translator object for escaping Shell command language.
339 *
340 * @see <a href="https://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a>
341 */
342 public static final CharSequenceTranslator ESCAPE_XSI;
343 static {
344 final Map<CharSequence, CharSequence> escapeXsiMap = new HashMap<>();
345 escapeXsiMap.put("|", "\\|");
346 escapeXsiMap.put("&", "\\&");
347 escapeXsiMap.put(";", "\\;");
348 escapeXsiMap.put("<", "\\<");
349 escapeXsiMap.put(">", "\\>");
350 escapeXsiMap.put("(", "\\(");
351 escapeXsiMap.put(")", "\\)");
352 escapeXsiMap.put("$", "\\$");
353 escapeXsiMap.put("`", "\\`");
354 escapeXsiMap.put("\\", "\\\\");
355 escapeXsiMap.put("\"", "\\\"");
356 escapeXsiMap.put("'", "\\'");
357 escapeXsiMap.put(" ", "\\ ");
358 escapeXsiMap.put("\t", "\\\t");
359 escapeXsiMap.put("\r\n", StringUtils.EMPTY);
360 escapeXsiMap.put("\n", StringUtils.EMPTY);
361 escapeXsiMap.put("*", "\\*");
362 escapeXsiMap.put("?", "\\?");
363 escapeXsiMap.put("[", "\\[");
364 escapeXsiMap.put("#", "\\#");
365 escapeXsiMap.put("~", "\\~");
366 escapeXsiMap.put("=", "\\=");
367 escapeXsiMap.put("%", "\\%");
368 ESCAPE_XSI = new LookupTranslator(
369 Collections.unmodifiableMap(escapeXsiMap)
370 );
371 }
372
373 /**
374 * Translator object for unescaping escaped Java.
375 *
376 * While {@link #unescapeJava(String)} is the expected method of use, this
377 * object allows the Java unescaping functionality to be used
378 * as the foundation for a custom translator.
379 */
380 public static final CharSequenceTranslator UNESCAPE_JAVA;
381
382 static {
383 final Map<CharSequence, CharSequence> unescapeJavaMap = new HashMap<>();
384 unescapeJavaMap.put("\\\\", "\\");
385 unescapeJavaMap.put("\\\"", "\"");
386 unescapeJavaMap.put("\\'", "'");
387 unescapeJavaMap.put("\\", StringUtils.EMPTY);
388 UNESCAPE_JAVA = new AggregateTranslator(
389 new OctalUnescaper(), // .between('\1', '\377'),
390 new UnicodeUnescaper(),
391 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE),
392 new LookupTranslator(Collections.unmodifiableMap(unescapeJavaMap))
393 );
394 }
395
396 /**
397 * Translator object for unescaping escaped EcmaScript.
398 *
399 * While {@link #unescapeEcmaScript(String)} is the expected method of use, this
400 * object allows the EcmaScript unescaping functionality to be used
401 * as the foundation for a custom translator.
402 */
403 public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA;
404
405 /**
406 * Translator object for unescaping escaped Json.
407 *
408 * While {@link #unescapeJson(String)} is the expected method of use, this
409 * object allows the Json unescaping functionality to be used
410 * as the foundation for a custom translator.
411 */
412 public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA;
413
414 /**
415 * Translator object for unescaping escaped HTML 3.0.
416 *
417 * While {@link #unescapeHtml3(String)} is the expected method of use, this
418 * object allows the HTML unescaping functionality to be used
419 * as the foundation for a custom translator.
420 */
421 public static final CharSequenceTranslator UNESCAPE_HTML3 =
422 new AggregateTranslator(
423 new LookupTranslator(EntityArrays.BASIC_UNESCAPE),
424 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE),
425 new NumericEntityUnescaper()
426 );
427
428 /**
429 * Translator object for unescaping escaped HTML 4.0.
430 *
431 * While {@link #unescapeHtml4(String)} is the expected method of use, this
432 * object allows the HTML unescaping functionality to be used
433 * as the foundation for a custom translator.
434 */
435 public static final CharSequenceTranslator UNESCAPE_HTML4 =
436 new AggregateTranslator(
437 new LookupTranslator(EntityArrays.BASIC_UNESCAPE),
438 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE),
439 new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE),
440 new NumericEntityUnescaper()
441 );
442
443 /**
444 * Translator object for unescaping escaped XML.
445 *
446 * While {@link #unescapeXml(String)} is the expected method of use, this
447 * object allows the XML unescaping functionality to be used
448 * as the foundation for a custom translator.
449 */
450 public static final CharSequenceTranslator UNESCAPE_XML =
451 new AggregateTranslator(
452 new LookupTranslator(EntityArrays.BASIC_UNESCAPE),
453 new LookupTranslator(EntityArrays.APOS_UNESCAPE),
454 new NumericEntityUnescaper()
455 );
456
457 /**
458 * Translator object for unescaping escaped Comma Separated Value entries.
459 *
460 * While {@link #unescapeCsv(String)} is the expected method of use, this
461 * object allows the CSV unescaping functionality to be used
462 * as the foundation for a custom translator.
463 */
464 public static final CharSequenceTranslator UNESCAPE_CSV = new CsvTranslators.CsvUnescaper();
465
466 /* Helper functions */
467
468 /**
469 * Translator object for unescaping escaped XSI Value entries.
470 *
471 * While {@link #unescapeXSI(String)} is the expected method of use, this
472 * object allows the XSI unescaping functionality to be used
473 * as the foundation for a custom translator.
474 */
475 public static final CharSequenceTranslator UNESCAPE_XSI = new XsiUnescaper();
476
477 /**
478 * Gets a {@link Builder}.
479 *
480 * @param translator the text translator.
481 * @return {@link Builder}
482 */
483 public static StringEscapeUtils.Builder builder(final CharSequenceTranslator translator) {
484 return new Builder(translator);
485 }
486
487 /**
488 * Returns a {@code String} value for a CSV column enclosed in double quotes, if required.
489 *
490 * <p>
491 * If the value contains a comma, newline or double quote, then the String value is returned enclosed in double quotes.
492 * </p>
493 *
494 * <p>
495 * Any double quote characters in the value are escaped with another double quote.
496 * </p>
497 *
498 * <p>
499 * If the value does not contain a comma, newline or double quote, then the String value is returned unchanged.
500 * </p>
501 * <p>
502 * See <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and <a href="https://tools.ietf.org/html/rfc4180">RFC 4180</a>.
503 * </p>
504 *
505 * @param input the input CSV column String, may be null.
506 * @return The input String, enclosed in double quotes if the value contains a comma, newline or double quote, {@code null} if null string input.
507 */
508 public static String escapeCsv(final String input) {
509 return ESCAPE_CSV.translate(input);
510 }
511
512 /**
513 * Escapes the characters in a {@code String} using EcmaScript String rules.
514 *
515 * <p>
516 * Escapes any values it finds into their EcmaScript String form. Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.)
517 * </p>
518 *
519 * <p>
520 * So a tab becomes the characters {@code '\\'} and {@code 't'}.
521 * </p>
522 *
523 * <p>
524 * The only difference between Java strings and EcmaScript strings is that in EcmaScript, a single quote and forward-slash (/) are escaped.
525 * </p>
526 *
527 * <p>
528 * Note that EcmaScript is best known by the JavaScript and ActionScript dialects.
529 * </p>
530 *
531 * <p>
532 * Example:
533 * </p>
534 *
535 * <pre>
536 * input string: He didn't say, "Stop!"
537 * output string: He didn\'t say, \"Stop!\"
538 * </pre>
539 * <p>
540 * <strong>Security Note.</strong> We only provide backslash escaping in this method. For example, {@code '\"'} has the output {@code '\\\"'} which could
541 * result in potential issues in the case where the string being escaped is being used in an HTML tag like {@code <select onmouseover="..." />}. If you wish
542 * to have more rigorous string escaping, you may consider the <a href="https://www.owasp.org/index.php/Category:OWASP_Enterprise_Security_API_JAVA">ESAPI
543 * Libraries</a>. Further, you can view the <a href="https://github.com/esapi">ESAPI GitHub Org</a>.
544 * </p>
545 *
546 * @param input String to escape values in, may be null.
547 * @return String with escaped values, {@code null} if null string input.
548 */
549 public static String escapeEcmaScript(final String input) {
550 return ESCAPE_ECMASCRIPT.translate(input);
551 }
552
553 /**
554 * Escapes the characters in a {@code String} using HTML entities.
555 *
556 * <p>Supports only the HTML 3.0 entities.</p>
557 *
558 * @param input the {@code String} to escape, may be null.
559 * @return a new escaped {@code String}, {@code null} if null string input.
560 */
561 public static String escapeHtml3(final String input) {
562 return ESCAPE_HTML3.translate(input);
563 }
564
565 // HTML and XML
566 /**
567 * Escapes the characters in a {@code String} using HTML entities.
568 *
569 * <p>
570 * For example:
571 * </p>
572 * <p>{@code "bread" & "butter"}</p>
573 * becomes:
574 * <p>
575 * {@code "bread" &amp; "butter"}.
576 * </p>
577 *
578 * <p>Supports all known HTML 4.0 entities, including funky accents.
579 * Note that the commonly used apostrophe escape character (&apos;)
580 * is not a legal entity and so is not supported).</p>
581 *
582 * @param input the {@code String} to escape, may be null.
583 * @return a new escaped {@code String}, {@code null} if null string input.
584 * @see <a href="https://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
585 * @see <a href="https://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
586 * @see <a href="https://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
587 * @see <a href="https://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
588 * @see <a href="https://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
589 */
590 public static String escapeHtml4(final String input) {
591 return ESCAPE_HTML4.translate(input);
592 }
593
594 // Java and JavaScript
595 /**
596 * Escapes the characters in a {@code String} using Java String rules.
597 *
598 * <p>
599 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.)
600 * </p>
601 *
602 * <p>
603 * So a tab becomes the characters {@code '\\'} and {@code 't'}.
604 * </p>
605 *
606 * <p>
607 * The only difference between Java strings and JavaScript strings is that in JavaScript, a single quote and forward-slash (/) are escaped.
608 * </p>
609 *
610 * <p>
611 * Example:
612 * </p>
613 *
614 * <pre>
615 * input string: He didn't say, "Stop!"
616 * output string: He didn't say, \"Stop!\"
617 * </pre>
618 *
619 * @param input String to escape values in, may be null.
620 * @return String with escaped values, {@code null} if null string input.
621 */
622 public static String escapeJava(final String input) {
623 return ESCAPE_JAVA.translate(input);
624 }
625
626 /**
627 * Escapes the characters in a {@code String} using Json String rules.
628 *
629 * <p>
630 * Escapes any values it finds into their Json String form. Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.)
631 * </p>
632 *
633 * <p>
634 * So a tab becomes the characters {@code '\\'} and {@code 't'}.
635 * </p>
636 *
637 * <p>
638 * The only difference between Java strings and Json strings is that in Json, forward-slash (/) is escaped.
639 * </p>
640 *
641 * <p>
642 * See http://www.ietf.org/rfc/rfc4627.txt for further details.
643 * </p>
644 *
645 * <p>
646 * Example:
647 * </p>
648 *
649 * <pre>
650 * input string: He didn't say, "Stop!"
651 * output string: He didn't say, \"Stop!\"
652 * </pre>
653 *
654 * @param input String to escape values in, may be null.
655 * @return String with escaped values, {@code null} if null string input.
656 */
657 public static String escapeJson(final String input) {
658 return ESCAPE_JSON.translate(input);
659 }
660
661 /**
662 * Escapes the characters in a {@code String} using XML entities.
663 *
664 * <p>
665 * For example: {@code "bread" & "butter"} => {@code "bread" & "butter"}.
666 * </p>
667 *
668 * <p>
669 * Note that XML 1.0 is a text-only format: it cannot represent control characters or unpaired Unicode surrogate code points, even after escaping.
670 * {@code escapeXml10} will remove characters that do not fit in the following ranges:
671 * </p>
672 *
673 * <p>
674 * {@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}
675 * </p>
676 *
677 * <p>
678 * Though not strictly necessary, {@code escapeXml10} will escape characters in the following ranges:
679 * </p>
680 *
681 * <p>
682 * {@code [#x7F-#x84] | [#x86-#x9F]}
683 * </p>
684 *
685 * <p>
686 * The returned string can be inserted into a valid XML 1.0 or XML 1.1 document. If you want to allow more non-text characters in an XML 1.1 document, use
687 * {@link #escapeXml11(String)}.
688 * </p>
689 *
690 * @param input the {@code String} to escape, may be null.
691 * @return a new escaped {@code String}, {@code null} if null string input.
692 * @see #unescapeXml(String)
693 */
694 public static String escapeXml10(final String input) {
695 return ESCAPE_XML10.translate(input);
696 }
697
698 /**
699 * Escapes the characters in a {@code String} using XML entities.
700 *
701 * <p>
702 * For example: {@code "bread" & "butter"} => {@code "bread" & "butter"}.
703 * </p>
704 *
705 * <p>
706 * XML 1.1 can represent certain control characters, but it cannot represent the null byte or unpaired Unicode surrogate code points, even after escaping.
707 * {@code escapeXml11} will remove characters that do not fit in the following ranges:
708 * </p>
709 *
710 * <p>
711 * {@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}
712 * </p>
713 *
714 * <p>
715 * {@code escapeXml11} will escape characters in the following ranges:
716 * </p>
717 *
718 * <p>
719 * {@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}
720 * </p>
721 *
722 * <p>
723 * The returned string can be inserted into a valid XML 1.1 document. Do not use it for XML 1.0 documents.
724 * </p>
725 *
726 * @param input the {@code String} to escape, may be null.
727 * @return a new escaped {@code String}, {@code null} if null string input.
728 * @see #unescapeXml(String)
729 */
730 public static String escapeXml11(final String input) {
731 return ESCAPE_XML11.translate(input);
732 }
733
734 /**
735 * Escapes the characters in a {@code String} using XSI rules.
736 *
737 * <p>
738 * <strong>Beware!</strong> In most cases you don't want to escape shell commands but use multi-argument methods provided by {@link ProcessBuilder} or
739 * {@link Runtime#exec(String[])} instead.
740 * </p>
741 *
742 * <p>
743 * Example:
744 * </p>
745 *
746 * <pre>
747 * input string: He didn't say, "Stop!"
748 * output string: He\ didn\'t\ say,\ \"Stop!\"
749 * </pre>
750 *
751 * @param input String to escape values in, may be null.
752 * @return String with escaped values, {@code null} if null string input.
753 * @see <a href="https://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a>
754 */
755 public static String escapeXSI(final String input) {
756 return ESCAPE_XSI.translate(input);
757 }
758
759 /**
760 * Returns a {@code String} value for an unescaped CSV column.
761 *
762 * <p>
763 * If the value is enclosed in double quotes, and contains a comma, newline or double quote, then quotes are removed.
764 * </p>
765 *
766 * <p>
767 * Any double quote escaped characters (a pair of double quotes) are unescaped to just one double quote.
768 * </p>
769 *
770 * <p>
771 * If the value is not enclosed in double quotes, or is and does not contain a comma, newline or double quote, then the String value is returned unchanged.
772 * </p>
773 *
774 * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and <a href="https://tools.ietf.org/html/rfc4180">RFC 4180</a>.
775 *
776 * @param input the input CSV column String, may be null.
777 * @return The input String, with enclosing double quotes removed and embedded double quotes unescaped, {@code null} if null string input.
778 */
779 public static String unescapeCsv(final String input) {
780 return UNESCAPE_CSV.translate(input);
781 }
782
783 /**
784 * Unescapes any EcmaScript literals found in the {@code String}.
785 *
786 * <p>
787 * For example, it will turn a sequence of {@code '\'} and {@code 'n'} into a newline character, unless the {@code '\'} is preceded by another {@code '\'}.
788 * </p>
789 *
790 * @param input the {@code String} to unescape, may be null.
791 * @return A new unescaped {@code String}, {@code null} if null string input.
792 * @see #unescapeJava(String)
793 */
794 public static String unescapeEcmaScript(final String input) {
795 return UNESCAPE_ECMASCRIPT.translate(input);
796 }
797
798 /**
799 * Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes. Supports only HTML 3.0
800 * entities.
801 *
802 * @param input the {@code String} to unescape, may be null.
803 * @return a new unescaped {@code String}, {@code null} if null string input.
804 */
805 public static String unescapeHtml3(final String input) {
806 return UNESCAPE_HTML3.translate(input);
807 }
808
809 /**
810 * Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes. Supports HTML 4.0
811 * entities.
812 *
813 * <p>
814 * For example, the string {@code "<Français>"} will become {@code "<Fran�ais>"}
815 * </p>
816 *
817 * <p>
818 * If an entity is unrecognized, it is left alone, and inserted verbatim into the result string. e.g. {@code ">&zzzz;x"} will become {@code ">&zzzz;x"}.
819 * </p>
820 *
821 * @param input the {@code String} to unescape, may be null.
822 * @return a new unescaped {@code String}, {@code null} if null string input.
823 */
824 public static String unescapeHtml4(final String input) {
825 return UNESCAPE_HTML4.translate(input);
826 }
827
828 /**
829 * Unescapes any Java literals found in the {@code String}. For example, it will turn a sequence of {@code '\'} and {@code 'n'} into a newline character,
830 * unless the {@code '\'} is preceded by another {@code '\'}.
831 *
832 * @param input the {@code String} to unescape, may be null.
833 * @return a new unescaped {@code String}, {@code null} if null string input.
834 */
835 public static String unescapeJava(final String input) {
836 return UNESCAPE_JAVA.translate(input);
837 }
838
839 /**
840 * Unescapes any Json literals found in the {@code String}.
841 *
842 * <p>
843 * For example, it will turn a sequence of {@code '\'} and {@code 'n'} into a newline character, unless the {@code '\'} is preceded by another {@code '\'}.
844 * </p>
845 *
846 * @param input the {@code String} to unescape, may be null.
847 * @return A new unescaped {@code String}, {@code null} if null string input.
848 * @see #unescapeJava(String)
849 */
850 public static String unescapeJson(final String input) {
851 return UNESCAPE_JSON.translate(input);
852 }
853
854 /**
855 * Unescapes a string containing XML entity escapes to a string containing the actual Unicode characters corresponding to the escapes.
856 *
857 * <p>
858 * Supports only the five basic XML entities (gt, lt, quot, amp, apos). Does not support DTDs or external entities.
859 * </p>
860 *
861 * <p>
862 * Note that numerical \\u Unicode codes are unescaped to their respective Unicode characters. This may change in future releases.
863 * </p>
864 *
865 * @param input the {@code String} to unescape, may be null.
866 * @return a new unescaped {@code String}, {@code null} if null string input.
867 * @see #escapeXml10(String)
868 * @see #escapeXml11(String)
869 */
870 public static String unescapeXml(final String input) {
871 return UNESCAPE_XML.translate(input);
872 }
873
874 /**
875 * Unescapes the characters in a {@code String} using XSI rules.
876 *
877 * @param input the {@code String} to unescape, may be null.
878 * @return a new unescaped {@code String}, {@code null} if null string input.
879 * @see StringEscapeUtils#escapeXSI(String)
880 */
881 public static String unescapeXSI(final String input) {
882 return UNESCAPE_XSI.translate(input);
883 }
884
885 /**
886 * {@code StringEscapeUtils} instances should NOT be constructed in standard programming.
887 *
888 * <p>
889 * Instead, the class should be used as:
890 * </p>
891 *
892 * <pre>
893 * StringEscapeUtils.escapeJava("foo");
894 * </pre>
895 *
896 * <p>
897 * This constructor is public to permit tools that require a JavaBean instance to operate.
898 * </p>
899 */
900 public StringEscapeUtils() {
901 }
902
903 }