View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.lang3;
18  
19  import static org.junit.Assert.assertEquals;
20  import static org.junit.Assert.assertFalse;
21  import static org.junit.Assert.assertNotNull;
22  import static org.junit.Assert.assertNull;
23  import static org.junit.Assert.assertTrue;
24  import static org.junit.Assert.fail;
25  
26  import java.io.IOException;
27  import java.io.StringWriter;
28  import java.lang.reflect.Constructor;
29  import java.lang.reflect.Modifier;
30  import java.nio.charset.Charset;
31  import java.nio.charset.StandardCharsets;
32  import java.nio.file.Files;
33  import java.nio.file.Paths;
34  
35  import org.apache.commons.lang3.text.translate.CharSequenceTranslator;
36  import org.apache.commons.lang3.text.translate.NumericEntityEscaper;
37  import org.junit.Test;
38  
39  /**
40   * Unit tests for {@link StringEscapeUtils}.
41   */
42  @Deprecated
43  public class StringEscapeUtilsTest {
44      private static final String FOO = "foo";
45  
46      @Test
47      public void testConstructor() {
48          assertNotNull(new StringEscapeUtils());
49          final Constructor<?>[] cons = StringEscapeUtils.class.getDeclaredConstructors();
50          assertEquals(1, cons.length);
51          assertTrue(Modifier.isPublic(cons[0].getModifiers()));
52          assertTrue(Modifier.isPublic(StringEscapeUtils.class.getModifiers()));
53          assertFalse(Modifier.isFinal(StringEscapeUtils.class.getModifiers()));
54      }
55  
56      @Test
57      public void testEscapeJava() throws IOException {
58          assertNull(StringEscapeUtils.escapeJava(null));
59          try {
60              StringEscapeUtils.ESCAPE_JAVA.translate(null, null);
61              fail();
62          } catch (final IOException ex) {
63              fail();
64          } catch (final IllegalArgumentException ex) {
65          }
66          try {
67              StringEscapeUtils.ESCAPE_JAVA.translate("", null);
68              fail();
69          } catch (final IOException ex) {
70              fail();
71          } catch (final IllegalArgumentException ex) {
72          }
73  
74          assertEscapeJava("empty string", "", "");
75          assertEscapeJava(FOO, FOO);
76          assertEscapeJava("tab", "\\t", "\t");
77          assertEscapeJava("backslash", "\\\\", "\\");
78          assertEscapeJava("single quote should not be escaped", "'", "'");
79          assertEscapeJava("\\\\\\b\\t\\r", "\\\b\t\r");
80          assertEscapeJava("\\u1234", "\u1234");
81          assertEscapeJava("\\u0234", "\u0234");
82          assertEscapeJava("\\u00EF", "\u00ef");
83          assertEscapeJava("\\u0001", "\u0001");
84          assertEscapeJava("Should use capitalized Unicode hex", "\\uABCD", "\uabcd");
85  
86          assertEscapeJava("He didn't say, \\\"stop!\\\"",
87                  "He didn't say, \"stop!\"");
88          assertEscapeJava("non-breaking space", "This space is non-breaking:" + "\\u00A0",
89                  "This space is non-breaking:\u00a0");
90          assertEscapeJava("\\uABCD\\u1234\\u012C",
91                  "\uABCD\u1234\u012C");
92      }
93  
94      /**
95       * Tests https://issues.apache.org/jira/browse/LANG-421
96       */
97      @Test
98      public void testEscapeJavaWithSlash() {
99          final String input = "String with a slash (/) in it";
100 
101         final String expected = input;
102         final String actual = StringEscapeUtils.escapeJava(input);
103 
104         /**
105          * In 2.4 StringEscapeUtils.escapeJava(String) escapes '/' characters, which are not a valid character to escape
106          * in a Java string.
107          */
108         assertEquals(expected, actual);
109     }
110 
111     private void assertEscapeJava(final String escaped, final String original) throws IOException {
112         assertEscapeJava(null, escaped, original);
113     }
114 
115     private void assertEscapeJava(String message, final String expected, final String original) throws IOException {
116         final String converted = StringEscapeUtils.escapeJava(original);
117         message = "escapeJava(String) failed" + (message == null ? "" : (": " + message));
118         assertEquals(message, expected, converted);
119 
120         final StringWriter writer = new StringWriter();
121         StringEscapeUtils.ESCAPE_JAVA.translate(original, writer);
122         assertEquals(expected, writer.toString());
123     }
124 
125     @Test
126     public void testUnescapeJava() throws IOException {
127         assertNull(StringEscapeUtils.unescapeJava(null));
128         try {
129             StringEscapeUtils.UNESCAPE_JAVA.translate(null, null);
130             fail();
131         } catch (final IOException ex) {
132             fail();
133         } catch (final IllegalArgumentException ex) {
134         }
135         try {
136             StringEscapeUtils.UNESCAPE_JAVA.translate("", null);
137             fail();
138         } catch (final IOException ex) {
139             fail();
140         } catch (final IllegalArgumentException ex) {
141         }
142         try {
143             StringEscapeUtils.unescapeJava("\\u02-3");
144             fail();
145         } catch (final RuntimeException ex) {
146         }
147 
148         assertUnescapeJava("", "");
149         assertUnescapeJava("test", "test");
150         assertUnescapeJava("\ntest\b", "\\ntest\\b");
151         assertUnescapeJava("\u123425foo\ntest\b", "\\u123425foo\\ntest\\b");
152         assertUnescapeJava("'\foo\teste\r", "\\'\\foo\\teste\\r");
153         assertUnescapeJava("", "\\");
154         //foo
155         assertUnescapeJava("lowercase Unicode", "\uABCDx", "\\uabcdx");
156         assertUnescapeJava("uppercase Unicode", "\uABCDx", "\\uABCDx");
157         assertUnescapeJava("Unicode as final character", "\uABCD", "\\uabcd");
158     }
159 
160     private void assertUnescapeJava(final String unescaped, final String original) throws IOException {
161         assertUnescapeJava(null, unescaped, original);
162     }
163 
164     private void assertUnescapeJava(final String message, final String unescaped, final String original) throws IOException {
165         final String expected = unescaped;
166         final String actual = StringEscapeUtils.unescapeJava(original);
167 
168         assertEquals("unescape(String) failed" +
169                 (message == null ? "" : (": " + message)) +
170                 ": expected '" + StringEscapeUtils.escapeJava(expected) +
171                 // we escape this so we can see it in the error message
172                 "' actual '" + StringEscapeUtils.escapeJava(actual) + "'",
173                 expected, actual);
174 
175         final StringWriter writer = new StringWriter();
176         StringEscapeUtils.UNESCAPE_JAVA.translate(original, writer);
177         assertEquals(unescaped, writer.toString());
178 
179     }
180 
181     @Test
182     public void testEscapeEcmaScript() {
183         assertNull(StringEscapeUtils.escapeEcmaScript(null));
184         try {
185             StringEscapeUtils.ESCAPE_ECMASCRIPT.translate(null, null);
186             fail();
187         } catch (final IOException ex) {
188             fail();
189         } catch (final IllegalArgumentException ex) {
190         }
191         try {
192             StringEscapeUtils.ESCAPE_ECMASCRIPT.translate("", null);
193             fail();
194         } catch (final IOException ex) {
195             fail();
196         } catch (final IllegalArgumentException ex) {
197         }
198 
199         assertEquals("He didn\\'t say, \\\"stop!\\\"", StringEscapeUtils.escapeEcmaScript("He didn't say, \"stop!\""));
200         assertEquals("document.getElementById(\\\"test\\\").value = \\'<script>alert(\\'aaa\\');<\\/script>\\';",
201                 StringEscapeUtils.escapeEcmaScript("document.getElementById(\"test\").value = '<script>alert('aaa');</script>';"));
202     }
203 
204     @Test
205     public void testUnescapeEcmaScript() {
206         assertNull(StringEscapeUtils.escapeEcmaScript(null));
207         try {
208             StringEscapeUtils.UNESCAPE_ECMASCRIPT.translate(null, null);
209             fail();
210         } catch (final IOException ex) {
211             fail();
212         } catch (final IllegalArgumentException ex) {
213         }
214         try {
215             StringEscapeUtils.UNESCAPE_ECMASCRIPT.translate("", null);
216             fail();
217         } catch (final IOException ex) {
218             fail();
219         } catch (final IllegalArgumentException ex) {
220         }
221 
222         assertEquals("He didn't say, \"stop!\"", StringEscapeUtils.unescapeEcmaScript("He didn\\'t say, \\\"stop!\\\""));
223         assertEquals("document.getElementById(\"test\").value = '<script>alert('aaa');</script>';",
224                 StringEscapeUtils.unescapeEcmaScript("document.getElementById(\\\"test\\\").value = \\'<script>alert(\\'aaa\\');<\\/script>\\';"));
225     }
226 
227 
228     // HTML and XML
229     //--------------------------------------------------------------
230 
231     private static final String[][] HTML_ESCAPES = {
232         {"no escaping", "plain text", "plain text"},
233         {"no escaping", "plain text", "plain text"},
234         {"empty string", "", ""},
235         {"null", null, null},
236         {"ampersand", "bread &amp; butter", "bread & butter"},
237         {"quotes", "&quot;bread&quot; &amp; butter", "\"bread\" & butter"},
238         {"final character only", "greater than &gt;", "greater than >"},
239         {"first character only", "&lt; less than", "< less than"},
240         {"apostrophe", "Huntington's chorea", "Huntington's chorea"},
241         {"languages", "English,Fran&ccedil;ais,\u65E5\u672C\u8A9E (nihongo)", "English,Fran\u00E7ais,\u65E5\u672C\u8A9E (nihongo)"},
242         {"8-bit ascii shouldn't number-escape", "\u0080\u009F", "\u0080\u009F"},
243     };
244 
245     @Test
246     public void testEscapeHtml() {
247         for (final String[] element : HTML_ESCAPES) {
248             final String message = element[0];
249             final String expected = element[1];
250             final String original = element[2];
251             assertEquals(message, expected, StringEscapeUtils.escapeHtml4(original));
252             final StringWriter sw = new StringWriter();
253             try {
254                 StringEscapeUtils.ESCAPE_HTML4.translate(original, sw);
255             } catch (final IOException e) {
256             }
257             final String actual = original == null ? null : sw.toString();
258             assertEquals(message, expected, actual);
259         }
260     }
261 
262     @Test
263     public void testUnescapeHtml4() {
264         for (final String[] element : HTML_ESCAPES) {
265             final String message = element[0];
266             final String expected = element[2];
267             final String original = element[1];
268             assertEquals(message, expected, StringEscapeUtils.unescapeHtml4(original));
269 
270             final StringWriter sw = new StringWriter();
271             try {
272                 StringEscapeUtils.UNESCAPE_HTML4.translate(original, sw);
273             } catch (final IOException e) {
274             }
275             final String actual = original == null ? null : sw.toString();
276             assertEquals(message, expected, actual);
277         }
278         // \u00E7 is a cedilla (c with wiggle under)
279         // note that the test string must be 7-bit-clean (Unicode escaped) or else it will compile incorrectly
280         // on some locales
281         assertEquals("funny chars pass through OK", "Fran\u00E7ais", StringEscapeUtils.unescapeHtml4("Fran\u00E7ais"));
282 
283         assertEquals("Hello&;World", StringEscapeUtils.unescapeHtml4("Hello&;World"));
284         assertEquals("Hello&#;World", StringEscapeUtils.unescapeHtml4("Hello&#;World"));
285         assertEquals("Hello&# ;World", StringEscapeUtils.unescapeHtml4("Hello&# ;World"));
286         assertEquals("Hello&##;World", StringEscapeUtils.unescapeHtml4("Hello&##;World"));
287     }
288 
289     @Test
290     public void testUnescapeHexCharsHtml() {
291         // Simple easy to grok test
292         assertEquals("hex number unescape", "\u0080\u009F", StringEscapeUtils.unescapeHtml4("&#x80;&#x9F;"));
293         assertEquals("hex number unescape", "\u0080\u009F", StringEscapeUtils.unescapeHtml4("&#X80;&#X9F;"));
294         // Test all Character values:
295         for (char i = Character.MIN_VALUE; i < Character.MAX_VALUE; i++) {
296             final Character c1 = new Character(i);
297             final Character c2 = new Character((char)(i+1));
298             final String expected = c1.toString() + c2.toString();
299             final String escapedC1 = "&#x" + Integer.toHexString((c1.charValue())) + ";";
300             final String escapedC2 = "&#x" + Integer.toHexString((c2.charValue())) + ";";
301             assertEquals("hex number unescape index " + (int)i, expected, StringEscapeUtils.unescapeHtml4(escapedC1 + escapedC2));
302         }
303     }
304 
305     @Test
306     public void testUnescapeUnknownEntity() throws Exception {
307         assertEquals("&zzzz;", StringEscapeUtils.unescapeHtml4("&zzzz;"));
308     }
309 
310     @Test
311     public void testEscapeHtmlVersions() throws Exception {
312         assertEquals("&Beta;", StringEscapeUtils.escapeHtml4("\u0392"));
313         assertEquals("\u0392", StringEscapeUtils.unescapeHtml4("&Beta;"));
314 
315         // TODO: refine API for escaping/unescaping specific HTML versions
316     }
317 
318     @Test
319     public void testEscapeXml() throws Exception {
320         assertEquals("&lt;abc&gt;", StringEscapeUtils.escapeXml("<abc>"));
321         assertEquals("<abc>", StringEscapeUtils.unescapeXml("&lt;abc&gt;"));
322 
323         assertEquals("XML should not escape >0x7f values",
324                 "\u00A1", StringEscapeUtils.escapeXml("\u00A1"));
325         assertEquals("XML should be able to unescape >0x7f values",
326                 "\u00A0", StringEscapeUtils.unescapeXml("&#160;"));
327         assertEquals("XML should be able to unescape >0x7f values with one leading 0",
328                 "\u00A0", StringEscapeUtils.unescapeXml("&#0160;"));
329         assertEquals("XML should be able to unescape >0x7f values with two leading 0s",
330                 "\u00A0", StringEscapeUtils.unescapeXml("&#00160;"));
331         assertEquals("XML should be able to unescape >0x7f values with three leading 0s",
332                 "\u00A0", StringEscapeUtils.unescapeXml("&#000160;"));
333 
334         assertEquals("ain't", StringEscapeUtils.unescapeXml("ain&apos;t"));
335         assertEquals("ain&apos;t", StringEscapeUtils.escapeXml("ain't"));
336         assertEquals("", StringEscapeUtils.escapeXml(""));
337         assertNull(StringEscapeUtils.escapeXml(null));
338         assertNull(StringEscapeUtils.unescapeXml(null));
339 
340         StringWriter sw = new StringWriter();
341         try {
342             StringEscapeUtils.ESCAPE_XML.translate("<abc>", sw);
343         } catch (final IOException e) {
344         }
345         assertEquals("XML was escaped incorrectly", "&lt;abc&gt;", sw.toString() );
346 
347         sw = new StringWriter();
348         try {
349             StringEscapeUtils.UNESCAPE_XML.translate("&lt;abc&gt;", sw);
350         } catch (final IOException e) {
351         }
352         assertEquals("XML was unescaped incorrectly", "<abc>", sw.toString() );
353     }
354 
355     @Test
356     public void testEscapeXml10() throws Exception {
357         assertEquals("a&lt;b&gt;c&quot;d&apos;e&amp;f", StringEscapeUtils.escapeXml10("a<b>c\"d'e&f"));
358         assertEquals("XML 1.0 should not escape \t \n \r",
359                 "a\tb\rc\nd", StringEscapeUtils.escapeXml10("a\tb\rc\nd"));
360         assertEquals("XML 1.0 should omit most #x0-x8 | #xb | #xc | #xe-#x19",
361                 "ab", StringEscapeUtils.escapeXml10("a\u0000\u0001\u0008\u000b\u000c\u000e\u001fb"));
362         assertEquals("XML 1.0 should omit #xd800-#xdfff",
363                 "a\ud7ff  \ue000b", StringEscapeUtils.escapeXml10("a\ud7ff\ud800 \udfff \ue000b"));
364         assertEquals("XML 1.0 should omit #xfffe | #xffff",
365                 "a\ufffdb", StringEscapeUtils.escapeXml10("a\ufffd\ufffe\uffffb"));
366         assertEquals("XML 1.0 should escape #x7f-#x84 | #x86 - #x9f, for XML 1.1 compatibility",
367                 "a\u007e&#127;&#132;\u0085&#134;&#159;\u00a0b", StringEscapeUtils.escapeXml10("a\u007e\u007f\u0084\u0085\u0086\u009f\u00a0b"));
368     }
369 
370     @Test
371     public void testEscapeXml11() throws Exception {
372         assertEquals("a&lt;b&gt;c&quot;d&apos;e&amp;f", StringEscapeUtils.escapeXml11("a<b>c\"d'e&f"));
373         assertEquals("XML 1.1 should not escape \t \n \r",
374                 "a\tb\rc\nd", StringEscapeUtils.escapeXml11("a\tb\rc\nd"));
375         assertEquals("XML 1.1 should omit #x0",
376                 "ab", StringEscapeUtils.escapeXml11("a\u0000b"));
377         assertEquals("XML 1.1 should escape #x1-x8 | #xb | #xc | #xe-#x19",
378                 "a&#1;&#8;&#11;&#12;&#14;&#31;b", StringEscapeUtils.escapeXml11("a\u0001\u0008\u000b\u000c\u000e\u001fb"));
379         assertEquals("XML 1.1 should escape #x7F-#x84 | #x86-#x9F",
380                 "a\u007e&#127;&#132;\u0085&#134;&#159;\u00a0b", StringEscapeUtils.escapeXml11("a\u007e\u007f\u0084\u0085\u0086\u009f\u00a0b"));
381         assertEquals("XML 1.1 should omit #xd800-#xdfff",
382                 "a\ud7ff  \ue000b", StringEscapeUtils.escapeXml11("a\ud7ff\ud800 \udfff \ue000b"));
383         assertEquals("XML 1.1 should omit #xfffe | #xffff",
384                 "a\ufffdb", StringEscapeUtils.escapeXml11("a\ufffd\ufffe\uffffb"));
385     }
386 
387     /**
388      * Tests Supplementary characters.
389      * <p>
390      * From http://www.w3.org/International/questions/qa-escapes
391      * </p>
392      * <blockquote>
393      * Supplementary characters are those Unicode characters that have code points higher than the characters in
394      * the Basic Multilingual Plane (BMP). In UTF-16 a supplementary character is encoded using two 16-bit surrogate code points from the
395      * BMP. Because of this, some people think that supplementary characters need to be represented using two escapes, but this is incorrect
396      * - you must use the single, code point value for that character. For example, use &amp;&#35;x233B4&#59; rather than
397      * &amp;&#35;xD84C&#59;&amp;&#35;xDFB4&#59;.
398      * </blockquote>
399      * @see <a href="http://www.w3.org/International/questions/qa-escapes">Using character escapes in markup and CSS</a>
400      * @see <a href="https://issues.apache.org/jira/browse/LANG-728">LANG-728</a>
401      */
402     @Test
403     public void testEscapeXmlSupplementaryCharacters() {
404         final CharSequenceTranslator escapeXml =
405             StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) );
406 
407         assertEquals("Supplementary character must be represented using a single escape", "&#144308;",
408                 escapeXml.translate("\uD84C\uDFB4"));
409 
410         assertEquals("Supplementary characters mixed with basic characters should be encoded correctly", "a b c &#144308;",
411                         escapeXml.translate("a b c \uD84C\uDFB4"));
412     }
413 
414     @Test
415     public void testEscapeXmlAllCharacters() {
416         // http://www.w3.org/TR/xml/#charsets says:
417         // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] /* any Unicode character,
418         // excluding the surrogate blocks, FFFE, and FFFF. */
419         final CharSequenceTranslator escapeXml = StringEscapeUtils.ESCAPE_XML
420                 .with(NumericEntityEscaper.below(9), NumericEntityEscaper.between(0xB, 0xC), NumericEntityEscaper.between(0xE, 0x19),
421                         NumericEntityEscaper.between(0xD800, 0xDFFF), NumericEntityEscaper.between(0xFFFE, 0xFFFF), NumericEntityEscaper.above(0x110000));
422 
423         assertEquals("&#0;&#1;&#2;&#3;&#4;&#5;&#6;&#7;&#8;", escapeXml.translate("\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008"));
424         assertEquals("\t", escapeXml.translate("\t")); // 0x9
425         assertEquals("\n", escapeXml.translate("\n")); // 0xA
426         assertEquals("&#11;&#12;", escapeXml.translate("\u000B\u000C"));
427         assertEquals("\r", escapeXml.translate("\r")); // 0xD
428         assertEquals("Hello World! Ain&apos;t this great?", escapeXml.translate("Hello World! Ain't this great?"));
429         assertEquals("&#14;&#15;&#24;&#25;", escapeXml.translate("\u000E\u000F\u0018\u0019"));
430     }
431 
432     /**
433      * Reverse of the above.
434      *
435      * @see <a href="https://issues.apache.org/jira/browse/LANG-729">LANG-729</a>
436      */
437     @Test
438     public void testUnescapeXmlSupplementaryCharacters() {
439         assertEquals("Supplementary character must be represented using a single escape", "\uD84C\uDFB4",
440                 StringEscapeUtils.unescapeXml("&#144308;") );
441 
442         assertEquals("Supplementary characters mixed with basic characters should be decoded correctly", "a b c \uD84C\uDFB4",
443                 StringEscapeUtils.unescapeXml("a b c &#144308;") );
444     }
445 
446     // Tests issue #38569
447     // http://issues.apache.org/bugzilla/show_bug.cgi?id=38569
448     @Test
449     public void testStandaloneAmphersand() {
450         assertEquals("<P&O>", StringEscapeUtils.unescapeHtml4("&lt;P&O&gt;"));
451         assertEquals("test & <", StringEscapeUtils.unescapeHtml4("test & &lt;"));
452         assertEquals("<P&O>", StringEscapeUtils.unescapeXml("&lt;P&O&gt;"));
453         assertEquals("test & <", StringEscapeUtils.unescapeXml("test & &lt;"));
454     }
455 
456     @Test
457     public void testLang313() {
458         assertEquals("& &", StringEscapeUtils.unescapeHtml4("& &amp;"));
459     }
460 
461     @Test
462     public void testEscapeCsvString() throws Exception {
463         assertEquals("foo.bar",            StringEscapeUtils.escapeCsv("foo.bar"));
464         assertEquals("\"foo,bar\"",        StringEscapeUtils.escapeCsv("foo,bar"));
465         assertEquals("\"foo\nbar\"",       StringEscapeUtils.escapeCsv("foo\nbar"));
466         assertEquals("\"foo\rbar\"",       StringEscapeUtils.escapeCsv("foo\rbar"));
467         assertEquals("\"foo\"\"bar\"",     StringEscapeUtils.escapeCsv("foo\"bar"));
468         assertEquals("foo\uD84C\uDFB4bar", StringEscapeUtils.escapeCsv("foo\uD84C\uDFB4bar"));
469         assertEquals("",   StringEscapeUtils.escapeCsv(""));
470         assertNull(StringEscapeUtils.escapeCsv(null));
471     }
472 
473     @Test
474     public void testEscapeCsvWriter() throws Exception {
475         checkCsvEscapeWriter("foo.bar",            "foo.bar");
476         checkCsvEscapeWriter("\"foo,bar\"",        "foo,bar");
477         checkCsvEscapeWriter("\"foo\nbar\"",       "foo\nbar");
478         checkCsvEscapeWriter("\"foo\rbar\"",       "foo\rbar");
479         checkCsvEscapeWriter("\"foo\"\"bar\"",     "foo\"bar");
480         checkCsvEscapeWriter("foo\uD84C\uDFB4bar", "foo\uD84C\uDFB4bar");
481         checkCsvEscapeWriter("", null);
482         checkCsvEscapeWriter("", "");
483     }
484 
485     private void checkCsvEscapeWriter(final String expected, final String value) {
486         try {
487             final StringWriter writer = new StringWriter();
488             StringEscapeUtils.ESCAPE_CSV.translate(value, writer);
489             assertEquals(expected, writer.toString());
490         } catch (final IOException e) {
491             fail("Threw: " + e);
492         }
493     }
494 
495     @Test(expected = IllegalStateException.class)
496     public void testEscapeCsvIllegalStateException() throws IOException {
497         final StringWriter writer = new StringWriter();
498         StringEscapeUtils.ESCAPE_CSV.translate("foo", -1, writer);
499     }
500 
501     @Test
502     public void testUnescapeCsvString() throws Exception {
503         assertEquals("foo.bar",              StringEscapeUtils.unescapeCsv("foo.bar"));
504         assertEquals("foo,bar",              StringEscapeUtils.unescapeCsv("\"foo,bar\""));
505         assertEquals("foo\nbar",             StringEscapeUtils.unescapeCsv("\"foo\nbar\""));
506         assertEquals("foo\rbar",             StringEscapeUtils.unescapeCsv("\"foo\rbar\""));
507         assertEquals("foo\"bar",             StringEscapeUtils.unescapeCsv("\"foo\"\"bar\""));
508         assertEquals("foo\uD84C\uDFB4bar",   StringEscapeUtils.unescapeCsv("foo\uD84C\uDFB4bar"));
509         assertEquals("",   StringEscapeUtils.unescapeCsv(""));
510         assertNull(StringEscapeUtils.unescapeCsv(null));
511 
512         assertEquals("\"foo.bar\"",          StringEscapeUtils.unescapeCsv("\"foo.bar\""));
513     }
514 
515     @Test
516     public void testUnescapeCsvWriter() throws Exception {
517         checkCsvUnescapeWriter("foo.bar",            "foo.bar");
518         checkCsvUnescapeWriter("foo,bar",            "\"foo,bar\"");
519         checkCsvUnescapeWriter("foo\nbar",           "\"foo\nbar\"");
520         checkCsvUnescapeWriter("foo\rbar",           "\"foo\rbar\"");
521         checkCsvUnescapeWriter("foo\"bar",           "\"foo\"\"bar\"");
522         checkCsvUnescapeWriter("foo\uD84C\uDFB4bar", "foo\uD84C\uDFB4bar");
523         checkCsvUnescapeWriter("", null);
524         checkCsvUnescapeWriter("", "");
525 
526         checkCsvUnescapeWriter("\"foo.bar\"",        "\"foo.bar\"");
527     }
528 
529     private void checkCsvUnescapeWriter(final String expected, final String value) {
530         try {
531             final StringWriter writer = new StringWriter();
532             StringEscapeUtils.UNESCAPE_CSV.translate(value, writer);
533             assertEquals(expected, writer.toString());
534         } catch (final IOException e) {
535             fail("Threw: " + e);
536         }
537     }
538 
539     @Test(expected = IllegalStateException.class)
540         public void testUnescapeCsvIllegalStateException() throws IOException {
541         final StringWriter writer = new StringWriter();
542         StringEscapeUtils.UNESCAPE_CSV.translate("foo", -1, writer);
543     }
544 
545     /**
546      * Tests // https://issues.apache.org/jira/browse/LANG-480
547      */
548     @Test
549     public void testEscapeHtmlHighUnicode() {
550         // this is the utf8 representation of the character:
551         // COUNTING ROD UNIT DIGIT THREE
552         // in Unicode
553         // codepoint: U+1D362
554         final byte[] data = new byte[] { (byte)0xF0, (byte)0x9D, (byte)0x8D, (byte)0xA2 };
555 
556         final String original = new String(data, Charset.forName("UTF8"));
557 
558         final String escaped = StringEscapeUtils.escapeHtml4( original );
559         assertEquals( "High Unicode should not have been escaped", original, escaped);
560 
561         final String unescaped = StringEscapeUtils.unescapeHtml4( escaped );
562         assertEquals( "High Unicode should have been unchanged", original, unescaped);
563 
564 // TODO: I think this should hold, needs further investigation
565 //        String unescapedFromEntity = StringEscapeUtils.unescapeHtml4( "&#119650;" );
566 //        assertEquals( "High Unicode should have been unescaped", original, unescapedFromEntity);
567     }
568 
569     /**
570      * Tests https://issues.apache.org/jira/browse/LANG-339
571      */
572     @Test
573     public void testEscapeHiragana() {
574         // Some random Japanese Unicode characters
575         final String original = "\u304B\u304C\u3068";
576         final String escaped = StringEscapeUtils.escapeHtml4(original);
577         assertEquals( "Hiragana character Unicode behaviour should not be being escaped by escapeHtml4",
578         original, escaped);
579 
580         final String unescaped = StringEscapeUtils.unescapeHtml4( escaped );
581 
582         assertEquals( "Hiragana character Unicode behaviour has changed - expected no unescaping", escaped, unescaped);
583     }
584 
585     /**
586      * Tests https://issues.apache.org/jira/browse/LANG-708
587      *
588      * @throws IOException
589      *             if an I/O error occurs
590      */
591     @Test
592     public void testLang708() throws IOException {
593         final byte[] inputBytes = Files.readAllBytes(Paths.get("src/test/resources/lang-708-input.txt"));
594         final String input = new String(inputBytes, StandardCharsets.UTF_8);
595         final String escaped = StringEscapeUtils.escapeEcmaScript(input);
596         // just the end:
597         assertTrue(escaped, escaped.endsWith("}]"));
598         // a little more:
599         assertTrue(escaped, escaped.endsWith("\"valueCode\\\":\\\"\\\"}]"));
600     }
601 
602     /**
603      * Tests https://issues.apache.org/jira/browse/LANG-720
604      */
605     @Test
606     public void testLang720() {
607         final String input = "\ud842\udfb7" + "A";
608         final String escaped = StringEscapeUtils.escapeXml(input);
609         assertEquals(input, escaped);
610     }
611 
612     /**
613      * Tests https://issues.apache.org/jira/browse/LANG-911
614      */
615     @Test
616     public void testLang911() {
617         final String bellsTest = "\ud83d\udc80\ud83d\udd14";
618         final String value = StringEscapeUtils.escapeJava(bellsTest);
619         final String valueTest = StringEscapeUtils.unescapeJava(value);
620         assertEquals(bellsTest, valueTest);
621     }
622 
623     @Test
624     public void testEscapeJson() {
625         assertNull(StringEscapeUtils.escapeJson(null));
626         try {
627             StringEscapeUtils.ESCAPE_JSON.translate(null, null);
628             fail();
629         } catch (final IOException ex) {
630             fail();
631         } catch (final IllegalArgumentException ex) {
632         }
633         try {
634             StringEscapeUtils.ESCAPE_JSON.translate("", null);
635             fail();
636         } catch (final IOException ex) {
637             fail();
638         } catch (final IllegalArgumentException ex) {
639         }
640 
641         assertEquals("He didn't say, \\\"stop!\\\"", StringEscapeUtils.escapeJson("He didn't say, \"stop!\""));
642 
643         final String expected = "\\\"foo\\\" isn't \\\"bar\\\". specials: \\b\\r\\n\\f\\t\\\\\\/";
644         final String input ="\"foo\" isn't \"bar\". specials: \b\r\n\f\t\\/";
645 
646         assertEquals(expected, StringEscapeUtils.escapeJson(input));
647     }
648 
649     @Test
650     public void testUnescapeJson() {
651         assertNull(StringEscapeUtils.unescapeJson(null));
652         try {
653             StringEscapeUtils.UNESCAPE_JSON.translate(null, null);
654             fail();
655         } catch (final IOException ex) {
656             fail();
657         } catch (final IllegalArgumentException ex) {
658         }
659         try {
660             StringEscapeUtils.UNESCAPE_JSON.translate("", null);
661             fail();
662         } catch (final IOException ex) {
663             fail();
664         } catch (final IllegalArgumentException ex) {
665         }
666 
667         assertEquals("He didn't say, \"stop!\"", StringEscapeUtils.unescapeJson("He didn't say, \\\"stop!\\\""));
668 
669         final String expected ="\"foo\" isn't \"bar\". specials: \b\r\n\f\t\\/";
670         final String input = "\\\"foo\\\" isn't \\\"bar\\\". specials: \\b\\r\\n\\f\\t\\\\\\/";
671 
672         assertEquals(expected, StringEscapeUtils.unescapeJson(input));
673     }
674 }