View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.lang3;
18  
19  import static org.junit.jupiter.api.Assertions.assertEquals;
20  import static org.junit.jupiter.api.Assertions.assertFalse;
21  import static org.junit.jupiter.api.Assertions.assertNotNull;
22  import static org.junit.jupiter.api.Assertions.assertNull;
23  import static org.junit.jupiter.api.Assertions.assertThrows;
24  import static org.junit.jupiter.api.Assertions.assertTrue;
25  
26  import java.io.IOException;
27  import java.io.StringWriter;
28  import java.lang.reflect.Constructor;
29  import java.lang.reflect.Modifier;
30  import java.nio.charset.StandardCharsets;
31  import java.nio.file.Files;
32  import java.nio.file.Paths;
33  
34  import org.apache.commons.lang3.text.translate.CharSequenceTranslator;
35  import org.apache.commons.lang3.text.translate.NumericEntityEscaper;
36  import org.junit.jupiter.api.Test;
37  
38  /**
39   * Unit tests for {@link StringEscapeUtils}.
40   */
41  @Deprecated
42  public class StringEscapeUtilsTest extends AbstractLangTest {
43      private static final String FOO = "foo";
44  
45      @Test
46      public void testConstructor() {
47          assertNotNull(new StringEscapeUtils());
48          final Constructor<?>[] cons = StringEscapeUtils.class.getDeclaredConstructors();
49          assertEquals(1, cons.length);
50          assertTrue(Modifier.isPublic(cons[0].getModifiers()));
51          assertTrue(Modifier.isPublic(StringEscapeUtils.class.getModifiers()));
52          assertFalse(Modifier.isFinal(StringEscapeUtils.class.getModifiers()));
53      }
54  
55      @Test
56      public void testEscapeJava() throws IOException {
57          assertNull(StringEscapeUtils.escapeJava(null));
58          assertThrows(NullPointerException.class, () -> StringEscapeUtils.ESCAPE_JAVA.translate(null, null));
59          assertThrows(NullPointerException.class, () -> StringEscapeUtils.ESCAPE_JAVA.translate("", null));
60  
61          assertEscapeJava("empty string", "", "");
62          assertEscapeJava(FOO, FOO);
63          assertEscapeJava("tab", "\\t", "\t");
64          assertEscapeJava("backslash", "\\\\", "\\");
65          assertEscapeJava("single quote should not be escaped", "'", "'");
66          assertEscapeJava("\\\\\\b\\t\\r", "\\\b\t\r");
67          assertEscapeJava("\\u1234", "\u1234");
68          assertEscapeJava("\\u0234", "\u0234");
69          assertEscapeJava("\\u00EF", "\u00ef");
70          assertEscapeJava("\\u0001", "\u0001");
71          assertEscapeJava("Should use capitalized Unicode hex", "\\uABCD", "\uabcd");
72  
73          assertEscapeJava("He didn't say, \\\"stop!\\\"",
74                  "He didn't say, \"stop!\"");
75          assertEscapeJava("non-breaking space", "This space is non-breaking:" + "\\u00A0",
76                  "This space is non-breaking:\u00a0");
77          assertEscapeJava("\\uABCD\\u1234\\u012C",
78                  "\uABCD\u1234\u012C");
79      }
80  
81      /**
82       * Tests https://issues.apache.org/jira/browse/LANG-421
83       */
84      @Test
85      public void testEscapeJavaWithSlash() {
86          final String input = "String with a slash (/) in it";
87  
88          final String expected = input;
89          final String actual = StringEscapeUtils.escapeJava(input);
90  
91          /*
92           * In 2.4 StringEscapeUtils.escapeJava(String) escapes '/' characters, which are not a valid character to escape
93           * in a Java string.
94           */
95          assertEquals(expected, actual);
96      }
97  
98      private void assertEscapeJava(final String escaped, final String original) throws IOException {
99          assertEscapeJava(null, escaped, original);
100     }
101 
102     private void assertEscapeJava(String message, final String expected, final String original) throws IOException {
103         final String converted = StringEscapeUtils.escapeJava(original);
104         message = "escapeJava(String) failed" + (message == null ? "" : (": " + message));
105         assertEquals(expected, converted, message);
106 
107         final StringWriter writer = new StringWriter();
108         StringEscapeUtils.ESCAPE_JAVA.translate(original, writer);
109         assertEquals(expected, writer.toString());
110     }
111 
112     @Test
113     public void testUnescapeJava() throws IOException {
114         assertNull(StringEscapeUtils.unescapeJava(null));
115         assertThrows(NullPointerException.class, () -> StringEscapeUtils.UNESCAPE_JAVA.translate(null, null));
116         assertThrows(NullPointerException.class, () -> StringEscapeUtils.UNESCAPE_JAVA.translate("", null));
117         assertThrows(RuntimeException.class, () -> StringEscapeUtils.unescapeJava("\\u02-3"));
118 
119         assertUnescapeJava("", "");
120         assertUnescapeJava("test", "test");
121         assertUnescapeJava("\ntest\b", "\\ntest\\b");
122         assertUnescapeJava("\u123425foo\ntest\b", "\\u123425foo\\ntest\\b");
123         assertUnescapeJava("'\foo\teste\r", "\\'\\foo\\teste\\r");
124         assertUnescapeJava("", "\\");
125         //foo
126         assertUnescapeJava("lowercase Unicode", "\uABCDx", "\\uabcdx");
127         assertUnescapeJava("uppercase Unicode", "\uABCDx", "\\uABCDx");
128         assertUnescapeJava("Unicode as final character", "\uABCD", "\\uabcd");
129     }
130 
131     private void assertUnescapeJava(final String unescaped, final String original) throws IOException {
132         assertUnescapeJava(null, unescaped, original);
133     }
134 
135     private void assertUnescapeJava(final String message, final String unescaped, final String original) throws IOException {
136         final String expected = unescaped;
137         final String actual = StringEscapeUtils.unescapeJava(original);
138 
139         assertEquals(expected, actual,
140                 "unescape(String) failed" +
141                 (message == null ? "" : (": " + message)) +
142                 ": expected '" + StringEscapeUtils.escapeJava(expected) +
143                 // we escape this so we can see it in the error message
144                 "' actual '" + StringEscapeUtils.escapeJava(actual) + "'");
145 
146         final StringWriter writer = new StringWriter();
147         StringEscapeUtils.UNESCAPE_JAVA.translate(original, writer);
148         assertEquals(unescaped, writer.toString());
149 
150     }
151 
152     @Test
153     public void testEscapeEcmaScript() {
154         assertNull(StringEscapeUtils.escapeEcmaScript(null));
155         assertThrows(NullPointerException.class, () -> StringEscapeUtils.ESCAPE_ECMASCRIPT.translate(null, null));
156         assertThrows(NullPointerException.class, () -> StringEscapeUtils.ESCAPE_ECMASCRIPT.translate("", null));
157 
158         assertEquals("He didn\\'t say, \\\"stop!\\\"", StringEscapeUtils.escapeEcmaScript("He didn't say, \"stop!\""));
159         assertEquals("document.getElementById(\\\"test\\\").value = \\'<script>alert(\\'aaa\\');<\\/script>\\';",
160                 StringEscapeUtils.escapeEcmaScript("document.getElementById(\"test\").value = '<script>alert('aaa');</script>';"));
161     }
162 
163     @Test
164     public void testUnescapeEcmaScript() {
165         assertNull(StringEscapeUtils.escapeEcmaScript(null));
166         assertThrows(NullPointerException.class, () -> StringEscapeUtils.UNESCAPE_ECMASCRIPT.translate(null, null));
167         assertThrows(NullPointerException.class, () -> StringEscapeUtils.UNESCAPE_ECMASCRIPT.translate("", null));
168 
169         assertEquals("He didn't say, \"stop!\"", StringEscapeUtils.unescapeEcmaScript("He didn\\'t say, \\\"stop!\\\""));
170         assertEquals("document.getElementById(\"test\").value = '<script>alert('aaa');</script>';",
171                 StringEscapeUtils.unescapeEcmaScript("document.getElementById(\\\"test\\\").value = \\'<script>alert(\\'aaa\\');<\\/script>\\';"));
172     }
173 
174 
175     /** HTML and XML */
176     private static final String[][] HTML_ESCAPES = {
177         {"no escaping", "plain text", "plain text"},
178         {"no escaping", "plain text", "plain text"},
179         {"empty string", "", ""},
180         {"null", null, null},
181         {"ampersand", "bread &amp; butter", "bread & butter"},
182         {"quotes", "&quot;bread&quot; &amp; butter", "\"bread\" & butter"},
183         {"final character only", "greater than &gt;", "greater than >"},
184         {"first character only", "&lt; less than", "< less than"},
185         {"apostrophe", "Huntington's chorea", "Huntington's chorea"},
186         {"languages", "English,Fran&ccedil;ais,\u65E5\u672C\u8A9E (nihongo)", "English,Fran\u00E7ais,\u65E5\u672C\u8A9E (nihongo)"},
187         {"8-bit ascii shouldn't number-escape", "\u0080\u009F", "\u0080\u009F"},
188     };
189 
190     @Test
191     public void testEscapeHtml() throws IOException {
192         for (final String[] element : HTML_ESCAPES) {
193             final String message = element[0];
194             final String expected = element[1];
195             final String original = element[2];
196             assertEquals(expected, StringEscapeUtils.escapeHtml4(original), message);
197             final StringWriter sw = new StringWriter();
198             StringEscapeUtils.ESCAPE_HTML4.translate(original, sw);
199             final String actual = original == null ? null : sw.toString();
200             assertEquals(expected, actual, message);
201         }
202     }
203 
204     @Test
205     public void testUnescapeHtml4() throws IOException {
206         for (final String[] element : HTML_ESCAPES) {
207             final String message = element[0];
208             final String expected = element[2];
209             final String original = element[1];
210             assertEquals(expected, StringEscapeUtils.unescapeHtml4(original), message);
211 
212             final StringWriter sw = new StringWriter();
213             StringEscapeUtils.UNESCAPE_HTML4.translate(original, sw);
214             final String actual = original == null ? null : sw.toString();
215             assertEquals(expected, actual, message);
216         }
217         // \u00E7 is a cedilla (c with wiggle under)
218         // note that the test string must be 7-bit-clean (Unicode escaped) or else it will compile incorrectly
219         // on some locales
220         assertEquals("Fran\u00E7ais", StringEscapeUtils.unescapeHtml4("Fran\u00E7ais"), "funny chars pass through OK");
221 
222         assertEquals("Hello&;World", StringEscapeUtils.unescapeHtml4("Hello&;World"));
223         assertEquals("Hello&#;World", StringEscapeUtils.unescapeHtml4("Hello&#;World"));
224         assertEquals("Hello&# ;World", StringEscapeUtils.unescapeHtml4("Hello&# ;World"));
225         assertEquals("Hello&##;World", StringEscapeUtils.unescapeHtml4("Hello&##;World"));
226     }
227 
228     @Test
229     public void testUnescapeHexCharsHtml() {
230         // Simple easy to grok test
231         assertEquals("\u0080\u009F", StringEscapeUtils.unescapeHtml4("&#x80;&#x9F;"), "hex number unescape");
232         assertEquals("\u0080\u009F", StringEscapeUtils.unescapeHtml4("&#X80;&#X9F;"), "hex number unescape");
233         // Test all Character values:
234         for (char i = Character.MIN_VALUE; i < Character.MAX_VALUE; i++) {
235             final Character c1 = Character.valueOf(i);
236             final Character c2 = Character.valueOf((char) (i+1));
237             final String expected = c1.toString() + c2.toString();
238             final String escapedC1 = "&#x" + Integer.toHexString((c1.charValue())) + ";";
239             final String escapedC2 = "&#x" + Integer.toHexString((c2.charValue())) + ";";
240             assertEquals(expected, StringEscapeUtils.unescapeHtml4(escapedC1 + escapedC2), "hex number unescape index " + (int) i);
241         }
242     }
243 
244     @Test
245     public void testUnescapeUnknownEntity() {
246         assertEquals("&zzzz;", StringEscapeUtils.unescapeHtml4("&zzzz;"));
247     }
248 
249     @Test
250     public void testEscapeHtmlVersions() {
251         assertEquals("&Beta;", StringEscapeUtils.escapeHtml4("\u0392"));
252         assertEquals("\u0392", StringEscapeUtils.unescapeHtml4("&Beta;"));
253 
254         // TODO: refine API for escaping/unescaping specific HTML versions
255     }
256 
257     @Test
258     public void testEscapeXml() throws Exception {
259         assertEquals("&lt;abc&gt;", StringEscapeUtils.escapeXml("<abc>"));
260         assertEquals("<abc>", StringEscapeUtils.unescapeXml("&lt;abc&gt;"));
261 
262         assertEquals("\u00A1", StringEscapeUtils.escapeXml("\u00A1"), "XML should not escape >0x7f values");
263         assertEquals("\u00A0", StringEscapeUtils.unescapeXml("&#160;"), "XML should be able to unescape >0x7f values");
264         assertEquals("\u00A0", StringEscapeUtils.unescapeXml("&#0160;"),
265                 "XML should be able to unescape >0x7f values with one leading 0");
266         assertEquals("\u00A0", StringEscapeUtils.unescapeXml("&#00160;"),
267                 "XML should be able to unescape >0x7f values with two leading 0s");
268         assertEquals("\u00A0", StringEscapeUtils.unescapeXml("&#000160;"),
269                 "XML should be able to unescape >0x7f values with three leading 0s");
270 
271         assertEquals("ain't", StringEscapeUtils.unescapeXml("ain&apos;t"));
272         assertEquals("ain&apos;t", StringEscapeUtils.escapeXml("ain't"));
273         assertEquals("", StringEscapeUtils.escapeXml(""));
274         assertNull(StringEscapeUtils.escapeXml(null));
275         assertNull(StringEscapeUtils.unescapeXml(null));
276 
277         StringWriter sw = new StringWriter();
278         StringEscapeUtils.ESCAPE_XML.translate("<abc>", sw);
279         assertEquals("&lt;abc&gt;", sw.toString(), "XML was escaped incorrectly");
280 
281         sw = new StringWriter();
282         StringEscapeUtils.UNESCAPE_XML.translate("&lt;abc&gt;", sw);
283         assertEquals("<abc>", sw.toString(), "XML was unescaped incorrectly");
284     }
285 
286     @Test
287     public void testEscapeXml10() {
288         assertEquals("a&lt;b&gt;c&quot;d&apos;e&amp;f", StringEscapeUtils.escapeXml10("a<b>c\"d'e&f"));
289         assertEquals("a\tb\rc\nd", StringEscapeUtils.escapeXml10("a\tb\rc\nd"), "XML 1.0 should not escape \t \n \r");
290         assertEquals("ab", StringEscapeUtils.escapeXml10("a\u0000\u0001\u0008\u000b\u000c\u000e\u001fb"),
291                 "XML 1.0 should omit most #x0-x8 | #xb | #xc | #xe-#x19");
292         assertEquals("a\ud7ff  \ue000b", StringEscapeUtils.escapeXml10("a\ud7ff\ud800 \udfff \ue000b"),
293                 "XML 1.0 should omit #xd800-#xdfff");
294         assertEquals("a\ufffdb", StringEscapeUtils.escapeXml10("a\ufffd\ufffe\uffffb"),
295                 "XML 1.0 should omit #xfffe | #xffff");
296         assertEquals("a\u007e&#127;&#132;\u0085&#134;&#159;\u00a0b",
297                 StringEscapeUtils.escapeXml10("a\u007e\u007f\u0084\u0085\u0086\u009f\u00a0b"),
298                 "XML 1.0 should escape #x7f-#x84 | #x86 - #x9f, for XML 1.1 compatibility");
299     }
300 
301     @Test
302     public void testEscapeXml11() {
303         assertEquals("a&lt;b&gt;c&quot;d&apos;e&amp;f", StringEscapeUtils.escapeXml11("a<b>c\"d'e&f"));
304         assertEquals("a\tb\rc\nd", StringEscapeUtils.escapeXml11("a\tb\rc\nd"), "XML 1.1 should not escape \t \n \r");
305         assertEquals("ab", StringEscapeUtils.escapeXml11("a\u0000b"), "XML 1.1 should omit #x0");
306         assertEquals("a&#1;&#8;&#11;&#12;&#14;&#31;b",
307                 StringEscapeUtils.escapeXml11("a\u0001\u0008\u000b\u000c\u000e\u001fb"),
308                 "XML 1.1 should escape #x1-x8 | #xb | #xc | #xe-#x19");
309         assertEquals("a\u007e&#127;&#132;\u0085&#134;&#159;\u00a0b",
310                 StringEscapeUtils.escapeXml11("a\u007e\u007f\u0084\u0085\u0086\u009f\u00a0b"),
311                 "XML 1.1 should escape #x7F-#x84 | #x86-#x9F");
312         assertEquals("a\ud7ff  \ue000b", StringEscapeUtils.escapeXml11("a\ud7ff\ud800 \udfff \ue000b"),
313                 "XML 1.1 should omit #xd800-#xdfff");
314         assertEquals("a\ufffdb", StringEscapeUtils.escapeXml11("a\ufffd\ufffe\uffffb"),
315                 "XML 1.1 should omit #xfffe | #xffff");
316     }
317 
318     /**
319      * Tests Supplementary characters.
320      * <p>
321      * From https://www.w3.org/International/questions/qa-escapes
322      * </p>
323      * <blockquote>
324      * Supplementary characters are those Unicode characters that have code points higher than the characters in
325      * the Basic Multilingual Plane (BMP). In UTF-16 a supplementary character is encoded using two 16-bit surrogate code points from the
326      * BMP. Because of this, some people think that supplementary characters need to be represented using two escapes, but this is incorrect
327      * - you must use the single, code point value for that character. For example, use &amp;&#35;x233B4&#59; rather than
328      * &amp;&#35;xD84C&#59;&amp;&#35;xDFB4&#59;.
329      * </blockquote>
330      * @see <a href="https://www.w3.org/International/questions/qa-escapes">Using character escapes in markup and CSS</a>
331      * @see <a href="https://issues.apache.org/jira/browse/LANG-728">LANG-728</a>
332      */
333     @Test
334     public void testEscapeXmlSupplementaryCharacters() {
335         final CharSequenceTranslator escapeXml =
336             StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) );
337 
338         assertEquals("&#144308;", escapeXml.translate("\uD84C\uDFB4"),
339                 "Supplementary character must be represented using a single escape");
340 
341         assertEquals("a b c &#144308;", escapeXml.translate("a b c \uD84C\uDFB4"),
342                 "Supplementary characters mixed with basic characters should be encoded correctly");
343     }
344 
345     @Test
346     public void testEscapeXmlAllCharacters() {
347         // https://www.w3.org/TR/xml/#charsets says:
348         // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] /* any Unicode character,
349         // excluding the surrogate blocks, FFFE, and FFFF. */
350         final CharSequenceTranslator escapeXml = StringEscapeUtils.ESCAPE_XML
351                 .with(NumericEntityEscaper.below(9), NumericEntityEscaper.between(0xB, 0xC), NumericEntityEscaper.between(0xE, 0x19),
352                         NumericEntityEscaper.between(0xD800, 0xDFFF), NumericEntityEscaper.between(0xFFFE, 0xFFFF), NumericEntityEscaper.above(0x110000));
353 
354         assertEquals("&#0;&#1;&#2;&#3;&#4;&#5;&#6;&#7;&#8;", escapeXml.translate("\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008"));
355         assertEquals("\t", escapeXml.translate("\t")); // 0x9
356         assertEquals("\n", escapeXml.translate("\n")); // 0xA
357         assertEquals("&#11;&#12;", escapeXml.translate("\u000B\u000C"));
358         assertEquals("\r", escapeXml.translate("\r")); // 0xD
359         assertEquals("Hello World! Ain&apos;t this great?", escapeXml.translate("Hello World! Ain't this great?"));
360         assertEquals("&#14;&#15;&#24;&#25;", escapeXml.translate("\u000E\u000F\u0018\u0019"));
361     }
362 
363     /**
364      * Reverse of the above.
365      *
366      * @see <a href="https://issues.apache.org/jira/browse/LANG-729">LANG-729</a>
367      */
368     @Test
369     public void testUnescapeXmlSupplementaryCharacters() {
370         assertEquals("\uD84C\uDFB4", StringEscapeUtils.unescapeXml("&#144308;"),
371                 "Supplementary character must be represented using a single escape");
372 
373         assertEquals("a b c \uD84C\uDFB4", StringEscapeUtils.unescapeXml("a b c &#144308;"),
374                 "Supplementary characters mixed with basic characters should be decoded correctly");
375     }
376 
377     // Tests issue LANG-150
378     // https://issues.apache.org/jira/browse/LANG-150
379     @Test
380     public void testStandaloneAmphersand() {
381         assertEquals("<P&O>", StringEscapeUtils.unescapeHtml4("&lt;P&O&gt;"));
382         assertEquals("test & <", StringEscapeUtils.unescapeHtml4("test & &lt;"));
383         assertEquals("<P&O>", StringEscapeUtils.unescapeXml("&lt;P&O&gt;"));
384         assertEquals("test & <", StringEscapeUtils.unescapeXml("test & &lt;"));
385     }
386 
387     @Test
388     public void testLang313() {
389         assertEquals("& &", StringEscapeUtils.unescapeHtml4("& &amp;"));
390     }
391 
392     @Test
393     public void testEscapeCsvString() {
394         assertEquals("foo.bar",            StringEscapeUtils.escapeCsv("foo.bar"));
395         assertEquals("\"foo,bar\"",        StringEscapeUtils.escapeCsv("foo,bar"));
396         assertEquals("\"foo\nbar\"",       StringEscapeUtils.escapeCsv("foo\nbar"));
397         assertEquals("\"foo\rbar\"",       StringEscapeUtils.escapeCsv("foo\rbar"));
398         assertEquals("\"foo\"\"bar\"",     StringEscapeUtils.escapeCsv("foo\"bar"));
399         assertEquals("foo\uD84C\uDFB4bar", StringEscapeUtils.escapeCsv("foo\uD84C\uDFB4bar"));
400         assertEquals("",   StringEscapeUtils.escapeCsv(""));
401         assertNull(StringEscapeUtils.escapeCsv(null));
402     }
403 
404     @Test
405     public void testEscapeCsvWriter() throws Exception {
406         checkCsvEscapeWriter("foo.bar",            "foo.bar");
407         checkCsvEscapeWriter("\"foo,bar\"",        "foo,bar");
408         checkCsvEscapeWriter("\"foo\nbar\"",       "foo\nbar");
409         checkCsvEscapeWriter("\"foo\rbar\"",       "foo\rbar");
410         checkCsvEscapeWriter("\"foo\"\"bar\"",     "foo\"bar");
411         checkCsvEscapeWriter("foo\uD84C\uDFB4bar", "foo\uD84C\uDFB4bar");
412         checkCsvEscapeWriter("", null);
413         checkCsvEscapeWriter("", "");
414     }
415 
416     private void checkCsvEscapeWriter(final String expected, final String value) throws IOException {
417         final StringWriter writer = new StringWriter();
418         StringEscapeUtils.ESCAPE_CSV.translate(value, writer);
419         assertEquals(expected, writer.toString());
420     }
421 
422     @Test
423     public void testEscapeCsvIllegalStateException() {
424         final StringWriter writer = new StringWriter();
425         assertThrows(IllegalStateException.class, () -> StringEscapeUtils.ESCAPE_CSV.translate("foo", -1, writer));
426     }
427 
428     @Test
429     public void testUnescapeCsvString() {
430         assertEquals("foo.bar",              StringEscapeUtils.unescapeCsv("foo.bar"));
431         assertEquals("foo,bar",              StringEscapeUtils.unescapeCsv("\"foo,bar\""));
432         assertEquals("foo\nbar",             StringEscapeUtils.unescapeCsv("\"foo\nbar\""));
433         assertEquals("foo\rbar",             StringEscapeUtils.unescapeCsv("\"foo\rbar\""));
434         assertEquals("foo\"bar",             StringEscapeUtils.unescapeCsv("\"foo\"\"bar\""));
435         assertEquals("foo\uD84C\uDFB4bar",   StringEscapeUtils.unescapeCsv("foo\uD84C\uDFB4bar"));
436         assertEquals("",   StringEscapeUtils.unescapeCsv(""));
437         assertNull(StringEscapeUtils.unescapeCsv(null));
438 
439         assertEquals("\"foo.bar\"",          StringEscapeUtils.unescapeCsv("\"foo.bar\""));
440     }
441 
442     @Test
443     public void testUnescapeCsvWriter() throws Exception {
444         checkCsvUnescapeWriter("foo.bar",            "foo.bar");
445         checkCsvUnescapeWriter("foo,bar",            "\"foo,bar\"");
446         checkCsvUnescapeWriter("foo\nbar",           "\"foo\nbar\"");
447         checkCsvUnescapeWriter("foo\rbar",           "\"foo\rbar\"");
448         checkCsvUnescapeWriter("foo\"bar",           "\"foo\"\"bar\"");
449         checkCsvUnescapeWriter("foo\uD84C\uDFB4bar", "foo\uD84C\uDFB4bar");
450         checkCsvUnescapeWriter("", null);
451         checkCsvUnescapeWriter("", "");
452 
453         checkCsvUnescapeWriter("\"foo.bar\"",        "\"foo.bar\"");
454     }
455 
456     private void checkCsvUnescapeWriter(final String expected, final String value) throws IOException {
457         final StringWriter writer = new StringWriter();
458         StringEscapeUtils.UNESCAPE_CSV.translate(value, writer);
459         assertEquals(expected, writer.toString());
460     }
461 
462     @Test
463     public void testUnescapeCsvIllegalStateException() {
464         final StringWriter writer = new StringWriter();
465         assertThrows(IllegalStateException.class, () -> StringEscapeUtils.UNESCAPE_CSV.translate("foo", -1, writer));
466     }
467 
468     /**
469      * Tests // https://issues.apache.org/jira/browse/LANG-480
470      */
471     @Test
472     public void testEscapeHtmlHighUnicode() {
473         // this is the utf8 representation of the character:
474         // COUNTING ROD UNIT DIGIT THREE
475         // in Unicode
476         // code point: U+1D362
477         final byte[] data = { (byte) 0xF0, (byte) 0x9D, (byte) 0x8D, (byte) 0xA2 };
478 
479         final String original = new String(data, StandardCharsets.UTF_8);
480 
481         final String escaped = StringEscapeUtils.escapeHtml4( original );
482         assertEquals(original, escaped, "High Unicode should not have been escaped");
483 
484         final String unescaped = StringEscapeUtils.unescapeHtml4( escaped );
485         assertEquals(original, unescaped, "High Unicode should have been unchanged");
486 
487 // TODO: I think this should hold, needs further investigation
488 //        String unescapedFromEntity = StringEscapeUtils.unescapeHtml4( "&#119650;" );
489 //        assertEquals( "High Unicode should have been unescaped", original, unescapedFromEntity);
490     }
491 
492     /**
493      * Tests https://issues.apache.org/jira/browse/LANG-339
494      */
495     @Test
496     public void testEscapeHiragana() {
497         // Some random Japanese Unicode characters
498         final String original = "\u304B\u304C\u3068";
499         final String escaped = StringEscapeUtils.escapeHtml4(original);
500         assertEquals(original, escaped,
501                 "Hiragana character Unicode behavior should not be being escaped by escapeHtml4");
502 
503         final String unescaped = StringEscapeUtils.unescapeHtml4( escaped );
504 
505         assertEquals(escaped, unescaped, "Hiragana character Unicode behavior has changed - expected no unescaping");
506     }
507 
508     /**
509      * Tests https://issues.apache.org/jira/browse/LANG-708
510      *
511      * @throws IOException
512      *             if an I/O error occurs
513      */
514     @Test
515     public void testLang708() throws IOException {
516         final byte[] inputBytes = Files.readAllBytes(Paths.get("src/test/resources/lang-708-input.txt"));
517         final String input = new String(inputBytes, StandardCharsets.UTF_8);
518         final String escaped = StringEscapeUtils.escapeEcmaScript(input);
519         // just the end:
520         assertTrue(escaped.endsWith("}]"), escaped);
521         // a little more:
522         assertTrue(escaped.endsWith("\"valueCode\\\":\\\"\\\"}]"), escaped);
523     }
524 
525     /**
526      * Tests https://issues.apache.org/jira/browse/LANG-720
527      */
528     @Test
529     public void testLang720() {
530         final String input = "\ud842\udfb7" + "A";
531         final String escaped = StringEscapeUtils.escapeXml(input);
532         assertEquals(input, escaped);
533     }
534 
535     /**
536      * Tests https://issues.apache.org/jira/browse/LANG-911
537      */
538     @Test
539     public void testLang911() {
540         final String bellsTest = "\ud83d\udc80\ud83d\udd14";
541         final String value = StringEscapeUtils.escapeJava(bellsTest);
542         final String valueTest = StringEscapeUtils.unescapeJava(value);
543         assertEquals(bellsTest, valueTest);
544     }
545 
546     @Test
547     public void testEscapeJson() {
548         assertNull(StringEscapeUtils.escapeJson(null));
549         assertThrows(NullPointerException.class, () -> StringEscapeUtils.ESCAPE_JSON.translate(null, null));
550         assertThrows(NullPointerException.class, () -> StringEscapeUtils.ESCAPE_JSON.translate("", null));
551 
552         assertEquals("He didn't say, \\\"stop!\\\"", StringEscapeUtils.escapeJson("He didn't say, \"stop!\""));
553 
554         final String expected = "\\\"foo\\\" isn't \\\"bar\\\". specials: \\b\\r\\n\\f\\t\\\\\\/";
555         final String input ="\"foo\" isn't \"bar\". specials: \b\r\n\f\t\\/";
556 
557         assertEquals(expected, StringEscapeUtils.escapeJson(input));
558     }
559 
560     @Test
561     public void testUnescapeJson() {
562         assertNull(StringEscapeUtils.unescapeJson(null));
563         assertThrows(NullPointerException.class, () -> StringEscapeUtils.UNESCAPE_JSON.translate(null, null));
564         assertThrows(NullPointerException.class, () -> StringEscapeUtils.UNESCAPE_JSON.translate("", null));
565 
566         assertEquals("He didn't say, \"stop!\"", StringEscapeUtils.unescapeJson("He didn't say, \\\"stop!\\\""));
567 
568         final String expected ="\"foo\" isn't \"bar\". specials: \b\r\n\f\t\\/";
569         final String input = "\\\"foo\\\" isn't \\\"bar\\\". specials: \\b\\r\\n\\f\\t\\\\\\/";
570 
571         assertEquals(expected, StringEscapeUtils.unescapeJson(input));
572     }
573 }