View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.lang3;
18  
19  import static org.junit.Assert.assertEquals;
20  import static org.junit.Assert.assertFalse;
21  import static org.junit.Assert.assertNotNull;
22  import static org.junit.Assert.assertTrue;
23  import static org.junit.Assert.fail;
24  
25  import java.io.IOException;
26  import java.io.StringWriter;
27  import java.lang.reflect.Constructor;
28  import java.lang.reflect.Modifier;
29  import java.nio.charset.Charset;
30  import java.nio.charset.StandardCharsets;
31  import java.nio.file.Files;
32  import java.nio.file.Paths;
33  
34  import org.apache.commons.lang3.text.translate.CharSequenceTranslator;
35  import org.apache.commons.lang3.text.translate.NumericEntityEscaper;
36  import org.junit.Test;
37  
38  /**
39   * Unit tests for {@link StringEscapeUtils}.
40   */
41  @Deprecated
42  public class StringEscapeUtilsTest {
43      private static final String FOO = "foo";
44  
45      @Test
46      public void testConstructor() {
47          assertNotNull(new StringEscapeUtils());
48          final Constructor<?>[] cons = StringEscapeUtils.class.getDeclaredConstructors();
49          assertEquals(1, cons.length);
50          assertTrue(Modifier.isPublic(cons[0].getModifiers()));
51          assertTrue(Modifier.isPublic(StringEscapeUtils.class.getModifiers()));
52          assertFalse(Modifier.isFinal(StringEscapeUtils.class.getModifiers()));
53      }
54  
55      @Test
56      public void testEscapeJava() throws IOException {
57          assertEquals(null, StringEscapeUtils.escapeJava(null));
58          try {
59              StringEscapeUtils.ESCAPE_JAVA.translate(null, null);
60              fail();
61          } catch (final IOException ex) {
62              fail();
63          } catch (final IllegalArgumentException ex) {
64          }
65          try {
66              StringEscapeUtils.ESCAPE_JAVA.translate("", null);
67              fail();
68          } catch (final IOException ex) {
69              fail();
70          } catch (final IllegalArgumentException ex) {
71          }
72  
73          assertEscapeJava("empty string", "", "");
74          assertEscapeJava(FOO, FOO);
75          assertEscapeJava("tab", "\\t", "\t");
76          assertEscapeJava("backslash", "\\\\", "\\");
77          assertEscapeJava("single quote should not be escaped", "'", "'");
78          assertEscapeJava("\\\\\\b\\t\\r", "\\\b\t\r");
79          assertEscapeJava("\\u1234", "\u1234");
80          assertEscapeJava("\\u0234", "\u0234");
81          assertEscapeJava("\\u00EF", "\u00ef");
82          assertEscapeJava("\\u0001", "\u0001");
83          assertEscapeJava("Should use capitalized Unicode hex", "\\uABCD", "\uabcd");
84  
85          assertEscapeJava("He didn't say, \\\"stop!\\\"",
86                  "He didn't say, \"stop!\"");
87          assertEscapeJava("non-breaking space", "This space is non-breaking:" + "\\u00A0",
88                  "This space is non-breaking:\u00a0");
89          assertEscapeJava("\\uABCD\\u1234\\u012C",
90                  "\uABCD\u1234\u012C");
91      }
92  
93      /**
94       * Tests https://issues.apache.org/jira/browse/LANG-421
95       */
96      @Test
97      public void testEscapeJavaWithSlash() {
98          final String input = "String with a slash (/) in it";
99  
100         final String expected = input;
101         final String actual = StringEscapeUtils.escapeJava(input);
102 
103         /**
104          * In 2.4 StringEscapeUtils.escapeJava(String) escapes '/' characters, which are not a valid character to escape
105          * in a Java string.
106          */
107         assertEquals(expected, actual);
108     }
109 
110     private void assertEscapeJava(final String escaped, final String original) throws IOException {
111         assertEscapeJava(null, escaped, original);
112     }
113 
114     private void assertEscapeJava(String message, final String expected, final String original) throws IOException {
115         final String converted = StringEscapeUtils.escapeJava(original);
116         message = "escapeJava(String) failed" + (message == null ? "" : (": " + message));
117         assertEquals(message, expected, converted);
118 
119         final StringWriter writer = new StringWriter();
120         StringEscapeUtils.ESCAPE_JAVA.translate(original, writer);
121         assertEquals(expected, writer.toString());
122     }
123 
124     @Test
125     public void testUnescapeJava() throws IOException {
126         assertEquals(null, StringEscapeUtils.unescapeJava(null));
127         try {
128             StringEscapeUtils.UNESCAPE_JAVA.translate(null, null);
129             fail();
130         } catch (final IOException ex) {
131             fail();
132         } catch (final IllegalArgumentException ex) {
133         }
134         try {
135             StringEscapeUtils.UNESCAPE_JAVA.translate("", null);
136             fail();
137         } catch (final IOException ex) {
138             fail();
139         } catch (final IllegalArgumentException ex) {
140         }
141         try {
142             StringEscapeUtils.unescapeJava("\\u02-3");
143             fail();
144         } catch (final RuntimeException ex) {
145         }
146 
147         assertUnescapeJava("", "");
148         assertUnescapeJava("test", "test");
149         assertUnescapeJava("\ntest\b", "\\ntest\\b");
150         assertUnescapeJava("\u123425foo\ntest\b", "\\u123425foo\\ntest\\b");
151         assertUnescapeJava("'\foo\teste\r", "\\'\\foo\\teste\\r");
152         assertUnescapeJava("", "\\");
153         //foo
154         assertUnescapeJava("lowercase Unicode", "\uABCDx", "\\uabcdx");
155         assertUnescapeJava("uppercase Unicode", "\uABCDx", "\\uABCDx");
156         assertUnescapeJava("Unicode as final character", "\uABCD", "\\uabcd");
157     }
158 
159     private void assertUnescapeJava(final String unescaped, final String original) throws IOException {
160         assertUnescapeJava(null, unescaped, original);
161     }
162 
163     private void assertUnescapeJava(final String message, final String unescaped, final String original) throws IOException {
164         final String expected = unescaped;
165         final String actual = StringEscapeUtils.unescapeJava(original);
166 
167         assertEquals("unescape(String) failed" +
168                 (message == null ? "" : (": " + message)) +
169                 ": expected '" + StringEscapeUtils.escapeJava(expected) +
170                 // we escape this so we can see it in the error message
171                 "' actual '" + StringEscapeUtils.escapeJava(actual) + "'",
172                 expected, actual);
173 
174         final StringWriter writer = new StringWriter();
175         StringEscapeUtils.UNESCAPE_JAVA.translate(original, writer);
176         assertEquals(unescaped, writer.toString());
177 
178     }
179 
180     @Test
181     public void testEscapeEcmaScript() {
182         assertEquals(null, StringEscapeUtils.escapeEcmaScript(null));
183         try {
184             StringEscapeUtils.ESCAPE_ECMASCRIPT.translate(null, null);
185             fail();
186         } catch (final IOException ex) {
187             fail();
188         } catch (final IllegalArgumentException ex) {
189         }
190         try {
191             StringEscapeUtils.ESCAPE_ECMASCRIPT.translate("", null);
192             fail();
193         } catch (final IOException ex) {
194             fail();
195         } catch (final IllegalArgumentException ex) {
196         }
197 
198         assertEquals("He didn\\'t say, \\\"stop!\\\"", StringEscapeUtils.escapeEcmaScript("He didn't say, \"stop!\""));
199         assertEquals("document.getElementById(\\\"test\\\").value = \\'<script>alert(\\'aaa\\');<\\/script>\\';",
200                 StringEscapeUtils.escapeEcmaScript("document.getElementById(\"test\").value = '<script>alert('aaa');</script>';"));
201     }
202 
203     @Test
204     public void testUnescapeEcmaScript() {
205         assertEquals(null, StringEscapeUtils.escapeEcmaScript(null));
206         try {
207             StringEscapeUtils.UNESCAPE_ECMASCRIPT.translate(null, null);
208             fail();
209         } catch (final IOException ex) {
210             fail();
211         } catch (final IllegalArgumentException ex) {
212         }
213         try {
214             StringEscapeUtils.UNESCAPE_ECMASCRIPT.translate("", null);
215             fail();
216         } catch (final IOException ex) {
217             fail();
218         } catch (final IllegalArgumentException ex) {
219         }
220 
221         assertEquals("He didn't say, \"stop!\"", StringEscapeUtils.unescapeEcmaScript("He didn\\'t say, \\\"stop!\\\""));
222         assertEquals("document.getElementById(\"test\").value = '<script>alert('aaa');</script>';",
223                 StringEscapeUtils.unescapeEcmaScript("document.getElementById(\\\"test\\\").value = \\'<script>alert(\\'aaa\\');<\\/script>\\';"));
224     }
225 
226 
227     // HTML and XML
228     //--------------------------------------------------------------
229 
230     private static final String[][] HTML_ESCAPES = {
231         {"no escaping", "plain text", "plain text"},
232         {"no escaping", "plain text", "plain text"},
233         {"empty string", "", ""},
234         {"null", null, null},
235         {"ampersand", "bread &amp; butter", "bread & butter"},
236         {"quotes", "&quot;bread&quot; &amp; butter", "\"bread\" & butter"},
237         {"final character only", "greater than &gt;", "greater than >"},
238         {"first character only", "&lt; less than", "< less than"},
239         {"apostrophe", "Huntington's chorea", "Huntington's chorea"},
240         {"languages", "English,Fran&ccedil;ais,\u65E5\u672C\u8A9E (nihongo)", "English,Fran\u00E7ais,\u65E5\u672C\u8A9E (nihongo)"},
241         {"8-bit ascii shouldn't number-escape", "\u0080\u009F", "\u0080\u009F"},
242     };
243 
244     @Test
245     public void testEscapeHtml() {
246         for (final String[] element : HTML_ESCAPES) {
247             final String message = element[0];
248             final String expected = element[1];
249             final String original = element[2];
250             assertEquals(message, expected, StringEscapeUtils.escapeHtml4(original));
251             final StringWriter sw = new StringWriter();
252             try {
253                 StringEscapeUtils.ESCAPE_HTML4.translate(original, sw);
254             } catch (final IOException e) {
255             }
256             final String actual = original == null ? null : sw.toString();
257             assertEquals(message, expected, actual);
258         }
259     }
260 
261     @Test
262     public void testUnescapeHtml4() {
263         for (final String[] element : HTML_ESCAPES) {
264             final String message = element[0];
265             final String expected = element[2];
266             final String original = element[1];
267             assertEquals(message, expected, StringEscapeUtils.unescapeHtml4(original));
268 
269             final StringWriter sw = new StringWriter();
270             try {
271                 StringEscapeUtils.UNESCAPE_HTML4.translate(original, sw);
272             } catch (final IOException e) {
273             }
274             final String actual = original == null ? null : sw.toString();
275             assertEquals(message, expected, actual);
276         }
277         // \u00E7 is a cedilla (c with wiggle under)
278         // note that the test string must be 7-bit-clean (Unicode escaped) or else it will compile incorrectly
279         // on some locales
280         assertEquals("funny chars pass through OK", "Fran\u00E7ais", StringEscapeUtils.unescapeHtml4("Fran\u00E7ais"));
281 
282         assertEquals("Hello&;World", StringEscapeUtils.unescapeHtml4("Hello&;World"));
283         assertEquals("Hello&#;World", StringEscapeUtils.unescapeHtml4("Hello&#;World"));
284         assertEquals("Hello&# ;World", StringEscapeUtils.unescapeHtml4("Hello&# ;World"));
285         assertEquals("Hello&##;World", StringEscapeUtils.unescapeHtml4("Hello&##;World"));
286     }
287 
288     @Test
289     public void testUnescapeHexCharsHtml() {
290         // Simple easy to grok test
291         assertEquals("hex number unescape", "\u0080\u009F", StringEscapeUtils.unescapeHtml4("&#x80;&#x9F;"));
292         assertEquals("hex number unescape", "\u0080\u009F", StringEscapeUtils.unescapeHtml4("&#X80;&#X9F;"));
293         // Test all Character values:
294         for (char i = Character.MIN_VALUE; i < Character.MAX_VALUE; i++) {
295             final Character c1 = new Character(i);
296             final Character c2 = new Character((char)(i+1));
297             final String expected = c1.toString() + c2.toString();
298             final String escapedC1 = "&#x" + Integer.toHexString((c1.charValue())) + ";";
299             final String escapedC2 = "&#x" + Integer.toHexString((c2.charValue())) + ";";
300             assertEquals("hex number unescape index " + (int)i, expected, StringEscapeUtils.unescapeHtml4(escapedC1 + escapedC2));
301         }
302     }
303 
304     @Test
305     public void testUnescapeUnknownEntity() throws Exception {
306         assertEquals("&zzzz;", StringEscapeUtils.unescapeHtml4("&zzzz;"));
307     }
308 
309     @Test
310     public void testEscapeHtmlVersions() throws Exception {
311         assertEquals("&Beta;", StringEscapeUtils.escapeHtml4("\u0392"));
312         assertEquals("\u0392", StringEscapeUtils.unescapeHtml4("&Beta;"));
313 
314         // TODO: refine API for escaping/unescaping specific HTML versions
315     }
316 
317     @Test
318     public void testEscapeXml() throws Exception {
319         assertEquals("&lt;abc&gt;", StringEscapeUtils.escapeXml("<abc>"));
320         assertEquals("<abc>", StringEscapeUtils.unescapeXml("&lt;abc&gt;"));
321 
322         assertEquals("XML should not escape >0x7f values",
323                 "\u00A1", StringEscapeUtils.escapeXml("\u00A1"));
324         assertEquals("XML should be able to unescape >0x7f values",
325                 "\u00A0", StringEscapeUtils.unescapeXml("&#160;"));
326         assertEquals("XML should be able to unescape >0x7f values with one leading 0",
327                 "\u00A0", StringEscapeUtils.unescapeXml("&#0160;"));
328         assertEquals("XML should be able to unescape >0x7f values with two leading 0s",
329                 "\u00A0", StringEscapeUtils.unescapeXml("&#00160;"));
330         assertEquals("XML should be able to unescape >0x7f values with three leading 0s",
331                 "\u00A0", StringEscapeUtils.unescapeXml("&#000160;"));
332 
333         assertEquals("ain't", StringEscapeUtils.unescapeXml("ain&apos;t"));
334         assertEquals("ain&apos;t", StringEscapeUtils.escapeXml("ain't"));
335         assertEquals("", StringEscapeUtils.escapeXml(""));
336         assertEquals(null, StringEscapeUtils.escapeXml(null));
337         assertEquals(null, StringEscapeUtils.unescapeXml(null));
338 
339         StringWriter sw = new StringWriter();
340         try {
341             StringEscapeUtils.ESCAPE_XML.translate("<abc>", sw);
342         } catch (final IOException e) {
343         }
344         assertEquals("XML was escaped incorrectly", "&lt;abc&gt;", sw.toString() );
345 
346         sw = new StringWriter();
347         try {
348             StringEscapeUtils.UNESCAPE_XML.translate("&lt;abc&gt;", sw);
349         } catch (final IOException e) {
350         }
351         assertEquals("XML was unescaped incorrectly", "<abc>", sw.toString() );
352     }
353 
354     @Test
355     public void testEscapeXml10() throws Exception {
356         assertEquals("a&lt;b&gt;c&quot;d&apos;e&amp;f", StringEscapeUtils.escapeXml10("a<b>c\"d'e&f"));
357         assertEquals("XML 1.0 should not escape \t \n \r",
358                 "a\tb\rc\nd", StringEscapeUtils.escapeXml10("a\tb\rc\nd"));
359         assertEquals("XML 1.0 should omit most #x0-x8 | #xb | #xc | #xe-#x19",
360                 "ab", StringEscapeUtils.escapeXml10("a\u0000\u0001\u0008\u000b\u000c\u000e\u001fb"));
361         assertEquals("XML 1.0 should omit #xd800-#xdfff",
362                 "a\ud7ff  \ue000b", StringEscapeUtils.escapeXml10("a\ud7ff\ud800 \udfff \ue000b"));
363         assertEquals("XML 1.0 should omit #xfffe | #xffff",
364                 "a\ufffdb", StringEscapeUtils.escapeXml10("a\ufffd\ufffe\uffffb"));
365         assertEquals("XML 1.0 should escape #x7f-#x84 | #x86 - #x9f, for XML 1.1 compatibility",
366                 "a\u007e&#127;&#132;\u0085&#134;&#159;\u00a0b", StringEscapeUtils.escapeXml10("a\u007e\u007f\u0084\u0085\u0086\u009f\u00a0b"));
367     }
368 
369     @Test
370     public void testEscapeXml11() throws Exception {
371         assertEquals("a&lt;b&gt;c&quot;d&apos;e&amp;f", StringEscapeUtils.escapeXml11("a<b>c\"d'e&f"));
372         assertEquals("XML 1.1 should not escape \t \n \r",
373                 "a\tb\rc\nd", StringEscapeUtils.escapeXml11("a\tb\rc\nd"));
374         assertEquals("XML 1.1 should omit #x0",
375                 "ab", StringEscapeUtils.escapeXml11("a\u0000b"));
376         assertEquals("XML 1.1 should escape #x1-x8 | #xb | #xc | #xe-#x19",
377                 "a&#1;&#8;&#11;&#12;&#14;&#31;b", StringEscapeUtils.escapeXml11("a\u0001\u0008\u000b\u000c\u000e\u001fb"));
378         assertEquals("XML 1.1 should escape #x7F-#x84 | #x86-#x9F",
379                 "a\u007e&#127;&#132;\u0085&#134;&#159;\u00a0b", StringEscapeUtils.escapeXml11("a\u007e\u007f\u0084\u0085\u0086\u009f\u00a0b"));
380         assertEquals("XML 1.1 should omit #xd800-#xdfff",
381                 "a\ud7ff  \ue000b", StringEscapeUtils.escapeXml11("a\ud7ff\ud800 \udfff \ue000b"));
382         assertEquals("XML 1.1 should omit #xfffe | #xffff",
383                 "a\ufffdb", StringEscapeUtils.escapeXml11("a\ufffd\ufffe\uffffb"));
384     }
385 
386     /**
387      * Tests Supplementary characters.
388      * <p>
389      * From http://www.w3.org/International/questions/qa-escapes
390      * </p>
391      * <blockquote>
392      * Supplementary characters are those Unicode characters that have code points higher than the characters in
393      * the Basic Multilingual Plane (BMP). In UTF-16 a supplementary character is encoded using two 16-bit surrogate code points from the
394      * BMP. Because of this, some people think that supplementary characters need to be represented using two escapes, but this is incorrect
395      * - you must use the single, code point value for that character. For example, use &amp;&#35;x233B4&#59; rather than
396      * &amp;&#35;xD84C&#59;&amp;&#35;xDFB4&#59;.
397      * </blockquote>
398      * @see <a href="http://www.w3.org/International/questions/qa-escapes">Using character escapes in markup and CSS</a>
399      * @see <a href="https://issues.apache.org/jira/browse/LANG-728">LANG-728</a>
400      */
401     @Test
402     public void testEscapeXmlSupplementaryCharacters() {
403         final CharSequenceTranslator escapeXml =
404             StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) );
405 
406         assertEquals("Supplementary character must be represented using a single escape", "&#144308;",
407                 escapeXml.translate("\uD84C\uDFB4"));
408 
409         assertEquals("Supplementary characters mixed with basic characters should be encoded correctly", "a b c &#144308;",
410                         escapeXml.translate("a b c \uD84C\uDFB4"));
411     }
412 
413     @Test
414     public void testEscapeXmlAllCharacters() {
415         // http://www.w3.org/TR/xml/#charsets says:
416         // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] /* any Unicode character,
417         // excluding the surrogate blocks, FFFE, and FFFF. */
418         final CharSequenceTranslator escapeXml = StringEscapeUtils.ESCAPE_XML
419                 .with(NumericEntityEscaper.below(9), NumericEntityEscaper.between(0xB, 0xC), NumericEntityEscaper.between(0xE, 0x19),
420                         NumericEntityEscaper.between(0xD800, 0xDFFF), NumericEntityEscaper.between(0xFFFE, 0xFFFF), NumericEntityEscaper.above(0x110000));
421 
422         assertEquals("&#0;&#1;&#2;&#3;&#4;&#5;&#6;&#7;&#8;", escapeXml.translate("\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008"));
423         assertEquals("\t", escapeXml.translate("\t")); // 0x9
424         assertEquals("\n", escapeXml.translate("\n")); // 0xA
425         assertEquals("&#11;&#12;", escapeXml.translate("\u000B\u000C"));
426         assertEquals("\r", escapeXml.translate("\r")); // 0xD
427         assertEquals("Hello World! Ain&apos;t this great?", escapeXml.translate("Hello World! Ain't this great?"));
428         assertEquals("&#14;&#15;&#24;&#25;", escapeXml.translate("\u000E\u000F\u0018\u0019"));
429     }
430 
431     /**
432      * Reverse of the above.
433      *
434      * @see <a href="https://issues.apache.org/jira/browse/LANG-729">LANG-729</a>
435      */
436     @Test
437     public void testUnescapeXmlSupplementaryCharacters() {
438         assertEquals("Supplementary character must be represented using a single escape", "\uD84C\uDFB4",
439                 StringEscapeUtils.unescapeXml("&#144308;") );
440 
441         assertEquals("Supplementary characters mixed with basic characters should be decoded correctly", "a b c \uD84C\uDFB4",
442                 StringEscapeUtils.unescapeXml("a b c &#144308;") );
443     }
444 
445     // Tests issue #38569
446     // http://issues.apache.org/bugzilla/show_bug.cgi?id=38569
447     @Test
448     public void testStandaloneAmphersand() {
449         assertEquals("<P&O>", StringEscapeUtils.unescapeHtml4("&lt;P&O&gt;"));
450         assertEquals("test & <", StringEscapeUtils.unescapeHtml4("test & &lt;"));
451         assertEquals("<P&O>", StringEscapeUtils.unescapeXml("&lt;P&O&gt;"));
452         assertEquals("test & <", StringEscapeUtils.unescapeXml("test & &lt;"));
453     }
454 
455     @Test
456     public void testLang313() {
457         assertEquals("& &", StringEscapeUtils.unescapeHtml4("& &amp;"));
458     }
459 
460     @Test
461     public void testEscapeCsvString() throws Exception {
462         assertEquals("foo.bar",            StringEscapeUtils.escapeCsv("foo.bar"));
463         assertEquals("\"foo,bar\"",        StringEscapeUtils.escapeCsv("foo,bar"));
464         assertEquals("\"foo\nbar\"",       StringEscapeUtils.escapeCsv("foo\nbar"));
465         assertEquals("\"foo\rbar\"",       StringEscapeUtils.escapeCsv("foo\rbar"));
466         assertEquals("\"foo\"\"bar\"",     StringEscapeUtils.escapeCsv("foo\"bar"));
467         assertEquals("foo\uD84C\uDFB4bar", StringEscapeUtils.escapeCsv("foo\uD84C\uDFB4bar"));
468         assertEquals("",   StringEscapeUtils.escapeCsv(""));
469         assertEquals(null, StringEscapeUtils.escapeCsv(null));
470     }
471 
472     @Test
473     public void testEscapeCsvWriter() throws Exception {
474         checkCsvEscapeWriter("foo.bar",            "foo.bar");
475         checkCsvEscapeWriter("\"foo,bar\"",        "foo,bar");
476         checkCsvEscapeWriter("\"foo\nbar\"",       "foo\nbar");
477         checkCsvEscapeWriter("\"foo\rbar\"",       "foo\rbar");
478         checkCsvEscapeWriter("\"foo\"\"bar\"",     "foo\"bar");
479         checkCsvEscapeWriter("foo\uD84C\uDFB4bar", "foo\uD84C\uDFB4bar");
480         checkCsvEscapeWriter("", null);
481         checkCsvEscapeWriter("", "");
482     }
483 
484     private void checkCsvEscapeWriter(final String expected, final String value) {
485         try {
486             final StringWriter writer = new StringWriter();
487             StringEscapeUtils.ESCAPE_CSV.translate(value, writer);
488             assertEquals(expected, writer.toString());
489         } catch (final IOException e) {
490             fail("Threw: " + e);
491         }
492     }
493 
494     @Test(expected = IllegalStateException.class)
495     public void testEscapeCsvIllegalStateException() throws IOException {
496         final StringWriter writer = new StringWriter();
497         StringEscapeUtils.ESCAPE_CSV.translate("foo", -1, writer);
498     }
499 
500     @Test
501     public void testUnescapeCsvString() throws Exception {
502         assertEquals("foo.bar",              StringEscapeUtils.unescapeCsv("foo.bar"));
503         assertEquals("foo,bar",              StringEscapeUtils.unescapeCsv("\"foo,bar\""));
504         assertEquals("foo\nbar",             StringEscapeUtils.unescapeCsv("\"foo\nbar\""));
505         assertEquals("foo\rbar",             StringEscapeUtils.unescapeCsv("\"foo\rbar\""));
506         assertEquals("foo\"bar",             StringEscapeUtils.unescapeCsv("\"foo\"\"bar\""));
507         assertEquals("foo\uD84C\uDFB4bar",   StringEscapeUtils.unescapeCsv("foo\uD84C\uDFB4bar"));
508         assertEquals("",   StringEscapeUtils.unescapeCsv(""));
509         assertEquals(null, StringEscapeUtils.unescapeCsv(null));
510 
511         assertEquals("\"foo.bar\"",          StringEscapeUtils.unescapeCsv("\"foo.bar\""));
512     }
513 
514     @Test
515     public void testUnescapeCsvWriter() throws Exception {
516         checkCsvUnescapeWriter("foo.bar",            "foo.bar");
517         checkCsvUnescapeWriter("foo,bar",            "\"foo,bar\"");
518         checkCsvUnescapeWriter("foo\nbar",           "\"foo\nbar\"");
519         checkCsvUnescapeWriter("foo\rbar",           "\"foo\rbar\"");
520         checkCsvUnescapeWriter("foo\"bar",           "\"foo\"\"bar\"");
521         checkCsvUnescapeWriter("foo\uD84C\uDFB4bar", "foo\uD84C\uDFB4bar");
522         checkCsvUnescapeWriter("", null);
523         checkCsvUnescapeWriter("", "");
524 
525         checkCsvUnescapeWriter("\"foo.bar\"",        "\"foo.bar\"");
526     }
527 
528     private void checkCsvUnescapeWriter(final String expected, final String value) {
529         try {
530             final StringWriter writer = new StringWriter();
531             StringEscapeUtils.UNESCAPE_CSV.translate(value, writer);
532             assertEquals(expected, writer.toString());
533         } catch (final IOException e) {
534             fail("Threw: " + e);
535         }
536     }
537 
538     @Test(expected = IllegalStateException.class)
539         public void testUnescapeCsvIllegalStateException() throws IOException {
540         final StringWriter writer = new StringWriter();
541         StringEscapeUtils.UNESCAPE_CSV.translate("foo", -1, writer);
542     }
543 
544     /**
545      * Tests // https://issues.apache.org/jira/browse/LANG-480
546      */
547     @Test
548     public void testEscapeHtmlHighUnicode() {
549         // this is the utf8 representation of the character:
550         // COUNTING ROD UNIT DIGIT THREE
551         // in Unicode
552         // codepoint: U+1D362
553         final byte[] data = new byte[] { (byte)0xF0, (byte)0x9D, (byte)0x8D, (byte)0xA2 };
554 
555         final String original = new String(data, Charset.forName("UTF8"));
556 
557         final String escaped = StringEscapeUtils.escapeHtml4( original );
558         assertEquals( "High Unicode should not have been escaped", original, escaped);
559 
560         final String unescaped = StringEscapeUtils.unescapeHtml4( escaped );
561         assertEquals( "High Unicode should have been unchanged", original, unescaped);
562 
563 // TODO: I think this should hold, needs further investigation
564 //        String unescapedFromEntity = StringEscapeUtils.unescapeHtml4( "&#119650;" );
565 //        assertEquals( "High Unicode should have been unescaped", original, unescapedFromEntity);
566     }
567 
568     /**
569      * Tests https://issues.apache.org/jira/browse/LANG-339
570      */
571     @Test
572     public void testEscapeHiragana() {
573         // Some random Japanese Unicode characters
574         final String original = "\u304B\u304C\u3068";
575         final String escaped = StringEscapeUtils.escapeHtml4(original);
576         assertEquals( "Hiragana character Unicode behaviour should not be being escaped by escapeHtml4",
577         original, escaped);
578 
579         final String unescaped = StringEscapeUtils.unescapeHtml4( escaped );
580 
581         assertEquals( "Hiragana character Unicode behaviour has changed - expected no unescaping", escaped, unescaped);
582     }
583 
584     /**
585      * Tests https://issues.apache.org/jira/browse/LANG-708
586      *
587      * @throws IOException
588      *             if an I/O error occurs
589      */
590     @Test
591     public void testLang708() throws IOException {
592         final byte[] inputBytes = Files.readAllBytes(Paths.get("src/test/resources/lang-708-input.txt"));
593         final String input = new String(inputBytes, StandardCharsets.UTF_8);
594         final String escaped = StringEscapeUtils.escapeEcmaScript(input);
595         // just the end:
596         assertTrue(escaped, escaped.endsWith("}]"));
597         // a little more:
598         assertTrue(escaped, escaped.endsWith("\"valueCode\\\":\\\"\\\"}]"));
599     }
600 
601     /**
602      * Tests https://issues.apache.org/jira/browse/LANG-720
603      */
604     @Test
605     public void testLang720() {
606         final String input = "\ud842\udfb7" + "A";
607         final String escaped = StringEscapeUtils.escapeXml(input);
608         assertEquals(input, escaped);
609     }
610 
611     /**
612      * Tests https://issues.apache.org/jira/browse/LANG-911
613      */
614     @Test
615     public void testLang911() {
616         final String bellsTest = "\ud83d\udc80\ud83d\udd14";
617         final String value = StringEscapeUtils.escapeJava(bellsTest);
618         final String valueTest = StringEscapeUtils.unescapeJava(value);
619         assertEquals(bellsTest, valueTest);
620     }
621 
622     @Test
623     public void testEscapeJson() {
624         assertEquals(null, StringEscapeUtils.escapeJson(null));
625         try {
626             StringEscapeUtils.ESCAPE_JSON.translate(null, null);
627             fail();
628         } catch (final IOException ex) {
629             fail();
630         } catch (final IllegalArgumentException ex) {
631         }
632         try {
633             StringEscapeUtils.ESCAPE_JSON.translate("", null);
634             fail();
635         } catch (final IOException ex) {
636             fail();
637         } catch (final IllegalArgumentException ex) {
638         }
639 
640         assertEquals("He didn't say, \\\"stop!\\\"", StringEscapeUtils.escapeJson("He didn't say, \"stop!\""));
641 
642         final String expected = "\\\"foo\\\" isn't \\\"bar\\\". specials: \\b\\r\\n\\f\\t\\\\\\/";
643         final String input ="\"foo\" isn't \"bar\". specials: \b\r\n\f\t\\/";
644 
645         assertEquals(expected, StringEscapeUtils.escapeJson(input));
646     }
647 
648     @Test
649     public void testUnescapeJson() {
650         assertEquals(null, StringEscapeUtils.unescapeJson(null));
651         try {
652             StringEscapeUtils.UNESCAPE_JSON.translate(null, null);
653             fail();
654         } catch (final IOException ex) {
655             fail();
656         } catch (final IllegalArgumentException ex) {
657         }
658         try {
659             StringEscapeUtils.UNESCAPE_JSON.translate("", null);
660             fail();
661         } catch (final IOException ex) {
662             fail();
663         } catch (final IllegalArgumentException ex) {
664         }
665 
666         assertEquals("He didn't say, \"stop!\"", StringEscapeUtils.unescapeJson("He didn't say, \\\"stop!\\\""));
667 
668         final String expected ="\"foo\" isn't \"bar\". specials: \b\r\n\f\t\\/";
669         final String input = "\\\"foo\\\" isn't \\\"bar\\\". specials: \\b\\r\\n\\f\\t\\\\\\/";
670 
671         assertEquals(expected, StringEscapeUtils.unescapeJson(input));
672     }
673 }