View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.lang3;
18  
19  import static org.junit.Assert.assertEquals;
20  import static org.junit.Assert.assertFalse;
21  import static org.junit.Assert.assertNotNull;
22  import static org.junit.Assert.assertTrue;
23  import static org.junit.Assert.fail;
24  
25  import java.io.FileInputStream;
26  import java.io.IOException;
27  import java.io.StringWriter;
28  import java.lang.reflect.Constructor;
29  import java.lang.reflect.Modifier;
30  
31  import org.apache.commons.io.IOUtils;
32  import org.apache.commons.lang3.text.translate.CharSequenceTranslator;
33  import org.apache.commons.lang3.text.translate.NumericEntityEscaper;
34  import org.junit.Test;
35  
36  /**
37   * Unit tests for {@link StringEscapeUtils}.
38   *
39   * @version $Id: StringEscapeUtilsTest.java 1585287 2014-04-06 11:24:03Z britter $
40   */
41  public class StringEscapeUtilsTest {
42      private final static String FOO = "foo";
43  
44      @Test
45      public void testConstructor() {
46          assertNotNull(new StringEscapeUtils());
47          final Constructor<?>[] cons = StringEscapeUtils.class.getDeclaredConstructors();
48          assertEquals(1, cons.length);
49          assertTrue(Modifier.isPublic(cons[0].getModifiers()));
50          assertTrue(Modifier.isPublic(StringEscapeUtils.class.getModifiers()));
51          assertFalse(Modifier.isFinal(StringEscapeUtils.class.getModifiers()));
52      }
53      
54      @Test
55      public void testEscapeJava() throws IOException {
56          assertEquals(null, StringEscapeUtils.escapeJava(null));
57          try {
58              StringEscapeUtils.ESCAPE_JAVA.translate(null, null);
59              fail();
60          } catch (final IOException ex) {
61              fail();
62          } catch (final IllegalArgumentException ex) {
63          }
64          try {
65              StringEscapeUtils.ESCAPE_JAVA.translate("", null);
66              fail();
67          } catch (final IOException ex) {
68              fail();
69          } catch (final IllegalArgumentException ex) {
70          }
71          
72          assertEscapeJava("empty string", "", "");
73          assertEscapeJava(FOO, FOO);
74          assertEscapeJava("tab", "\\t", "\t");
75          assertEscapeJava("backslash", "\\\\", "\\");
76          assertEscapeJava("single quote should not be escaped", "'", "'");
77          assertEscapeJava("\\\\\\b\\t\\r", "\\\b\t\r");
78          assertEscapeJava("\\u1234", "\u1234");
79          assertEscapeJava("\\u0234", "\u0234");
80          assertEscapeJava("\\u00EF", "\u00ef");
81          assertEscapeJava("\\u0001", "\u0001");
82          assertEscapeJava("Should use capitalized Unicode hex", "\\uABCD", "\uabcd");
83  
84          assertEscapeJava("He didn't say, \\\"stop!\\\"",
85                  "He didn't say, \"stop!\"");
86          assertEscapeJava("non-breaking space", "This space is non-breaking:" + "\\u00A0",
87                  "This space is non-breaking:\u00a0");
88          assertEscapeJava("\\uABCD\\u1234\\u012C",
89                  "\uABCD\u1234\u012C");
90      }
91  
92      /**
93       * Tests https://issues.apache.org/jira/browse/LANG-421
94       */
95      @Test
96      public void testEscapeJavaWithSlash() {
97          final String input = "String with a slash (/) in it";
98  
99          final String expected = input;
100         final String actual = StringEscapeUtils.escapeJava(input);
101 
102         /**
103          * In 2.4 StringEscapeUtils.escapeJava(String) escapes '/' characters, which are not a valid character to escape
104          * in a Java string.
105          */
106         assertEquals(expected, actual);
107     }
108     
109     private void assertEscapeJava(final String escaped, final String original) throws IOException {
110         assertEscapeJava(null, escaped, original);
111     }
112 
113     private void assertEscapeJava(String message, final String expected, final String original) throws IOException {
114         final String converted = StringEscapeUtils.escapeJava(original);
115         message = "escapeJava(String) failed" + (message == null ? "" : (": " + message));
116         assertEquals(message, expected, converted);
117 
118         final StringWriter writer = new StringWriter();
119         StringEscapeUtils.ESCAPE_JAVA.translate(original, writer);
120         assertEquals(expected, writer.toString());
121     }
122 
123     @Test
124     public void testUnescapeJava() throws IOException {
125         assertEquals(null, StringEscapeUtils.unescapeJava(null));
126         try {
127             StringEscapeUtils.UNESCAPE_JAVA.translate(null, null);
128             fail();
129         } catch (final IOException ex) {
130             fail();
131         } catch (final IllegalArgumentException ex) {
132         }
133         try {
134             StringEscapeUtils.UNESCAPE_JAVA.translate("", null);
135             fail();
136         } catch (final IOException ex) {
137             fail();
138         } catch (final IllegalArgumentException ex) {
139         }
140         try {
141             StringEscapeUtils.unescapeJava("\\u02-3");
142             fail();
143         } catch (final RuntimeException ex) {
144         }
145         
146         assertUnescapeJava("", "");
147         assertUnescapeJava("test", "test");
148         assertUnescapeJava("\ntest\b", "\\ntest\\b");
149         assertUnescapeJava("\u123425foo\ntest\b", "\\u123425foo\\ntest\\b");
150         assertUnescapeJava("'\foo\teste\r", "\\'\\foo\\teste\\r");
151         assertUnescapeJava("", "\\");
152         //foo
153         assertUnescapeJava("lowercase Unicode", "\uABCDx", "\\uabcdx");
154         assertUnescapeJava("uppercase Unicode", "\uABCDx", "\\uABCDx");
155         assertUnescapeJava("Unicode as final character", "\uABCD", "\\uabcd");
156     }
157 
158     private void assertUnescapeJava(final String unescaped, final String original) throws IOException {
159         assertUnescapeJava(null, unescaped, original);
160     }
161 
162     private void assertUnescapeJava(final String message, final String unescaped, final String original) throws IOException {
163         final String expected = unescaped;
164         final String actual = StringEscapeUtils.unescapeJava(original);
165 
166         assertEquals("unescape(String) failed" +
167                 (message == null ? "" : (": " + message)) +
168                 ": expected '" + StringEscapeUtils.escapeJava(expected) +
169                 // we escape this so we can see it in the error message
170                 "' actual '" + StringEscapeUtils.escapeJava(actual) + "'",
171                 expected, actual);
172 
173         final StringWriter writer = new StringWriter();
174         StringEscapeUtils.UNESCAPE_JAVA.translate(original, writer);
175         assertEquals(unescaped, writer.toString());
176 
177     }
178 
179     @Test
180     public void testEscapeEcmaScript() {
181         assertEquals(null, StringEscapeUtils.escapeEcmaScript(null));
182         try {
183             StringEscapeUtils.ESCAPE_ECMASCRIPT.translate(null, null);
184             fail();
185         } catch (final IOException ex) {
186             fail();
187         } catch (final IllegalArgumentException ex) {
188         }
189         try {
190             StringEscapeUtils.ESCAPE_ECMASCRIPT.translate("", null);
191             fail();
192         } catch (final IOException ex) {
193             fail();
194         } catch (final IllegalArgumentException ex) {
195         }
196         
197         assertEquals("He didn\\'t say, \\\"stop!\\\"", StringEscapeUtils.escapeEcmaScript("He didn't say, \"stop!\""));
198         assertEquals("document.getElementById(\\\"test\\\").value = \\'<script>alert(\\'aaa\\');<\\/script>\\';", 
199                 StringEscapeUtils.escapeEcmaScript("document.getElementById(\"test\").value = '<script>alert('aaa');</script>';"));
200     }
201 
202 
203     // HTML and XML
204     //--------------------------------------------------------------
205 
206     private static final String[][] HTML_ESCAPES = {
207         {"no escaping", "plain text", "plain text"},
208         {"no escaping", "plain text", "plain text"},
209         {"empty string", "", ""},
210         {"null", null, null},
211         {"ampersand", "bread &amp; butter", "bread & butter"},
212         {"quotes", "&quot;bread&quot; &amp; butter", "\"bread\" & butter"},
213         {"final character only", "greater than &gt;", "greater than >"},
214         {"first character only", "&lt; less than", "< less than"},
215         {"apostrophe", "Huntington's chorea", "Huntington's chorea"},
216         {"languages", "English,Fran&ccedil;ais,\u65E5\u672C\u8A9E (nihongo)", "English,Fran\u00E7ais,\u65E5\u672C\u8A9E (nihongo)"},
217         {"8-bit ascii shouldn't number-escape", "\u0080\u009F", "\u0080\u009F"},
218     };
219 
220     @Test
221     public void testEscapeHtml() {
222         for (String[] element : HTML_ESCAPES) {
223             final String message = element[0];
224             final String expected = element[1];
225             final String original = element[2];
226             assertEquals(message, expected, StringEscapeUtils.escapeHtml4(original));
227             final StringWriter sw = new StringWriter();
228             try {
229                 StringEscapeUtils.ESCAPE_HTML4.translate(original, sw);
230             } catch (final IOException e) {
231             }
232             final String actual = original == null ? null : sw.toString();
233             assertEquals(message, expected, actual);
234         }
235     }
236 
237     @Test
238     public void testUnescapeHtml4() {
239         for (String[] element : HTML_ESCAPES) {
240             final String message = element[0];
241             final String expected = element[2];
242             final String original = element[1];
243             assertEquals(message, expected, StringEscapeUtils.unescapeHtml4(original));
244             
245             final StringWriter sw = new StringWriter();
246             try {
247                 StringEscapeUtils.UNESCAPE_HTML4.translate(original, sw);
248             } catch (final IOException e) {
249             }
250             final String actual = original == null ? null : sw.toString();
251             assertEquals(message, expected, actual);
252         }
253         // \u00E7 is a cedilla (c with wiggle under)
254         // note that the test string must be 7-bit-clean (Unicode escaped) or else it will compile incorrectly
255         // on some locales        
256         assertEquals("funny chars pass through OK", "Fran\u00E7ais", StringEscapeUtils.unescapeHtml4("Fran\u00E7ais"));
257         
258         assertEquals("Hello&;World", StringEscapeUtils.unescapeHtml4("Hello&;World"));
259         assertEquals("Hello&#;World", StringEscapeUtils.unescapeHtml4("Hello&#;World"));
260         assertEquals("Hello&# ;World", StringEscapeUtils.unescapeHtml4("Hello&# ;World"));
261         assertEquals("Hello&##;World", StringEscapeUtils.unescapeHtml4("Hello&##;World"));
262     }
263 
264     @Test
265     public void testUnescapeHexCharsHtml() {
266         // Simple easy to grok test 
267         assertEquals("hex number unescape", "\u0080\u009F", StringEscapeUtils.unescapeHtml4("&#x80;&#x9F;"));
268         assertEquals("hex number unescape", "\u0080\u009F", StringEscapeUtils.unescapeHtml4("&#X80;&#X9F;"));
269         // Test all Character values:
270         for (char i = Character.MIN_VALUE; i < Character.MAX_VALUE; i++) {
271             final Character c1 = new Character(i);
272             final Character c2 = new Character((char)(i+1));
273             final String expected = c1.toString() + c2.toString();
274             final String escapedC1 = "&#x" + Integer.toHexString((c1.charValue())) + ";";
275             final String escapedC2 = "&#x" + Integer.toHexString((c2.charValue())) + ";";
276             assertEquals("hex number unescape index " + (int)i, expected, StringEscapeUtils.unescapeHtml4(escapedC1 + escapedC2));
277         }
278     }
279 
280     @Test
281     public void testUnescapeUnknownEntity() throws Exception {
282         assertEquals("&zzzz;", StringEscapeUtils.unescapeHtml4("&zzzz;"));
283     }
284 
285     @Test
286     public void testEscapeHtmlVersions() throws Exception {
287         assertEquals("&Beta;", StringEscapeUtils.escapeHtml4("\u0392"));
288         assertEquals("\u0392", StringEscapeUtils.unescapeHtml4("&Beta;"));
289 
290         // TODO: refine API for escaping/unescaping specific HTML versions
291     }
292 
293     @Test
294     @SuppressWarnings( "deprecation" ) // ESCAPE_XML has been replaced by ESCAPE_XML10 and ESCAPE_XML11 in 3.3
295     public void testEscapeXml() throws Exception {
296         assertEquals("&lt;abc&gt;", StringEscapeUtils.escapeXml("<abc>"));
297         assertEquals("<abc>", StringEscapeUtils.unescapeXml("&lt;abc&gt;"));
298 
299         assertEquals("XML should not escape >0x7f values",
300                 "\u00A1", StringEscapeUtils.escapeXml("\u00A1"));
301         assertEquals("XML should be able to unescape >0x7f values",
302                 "\u00A0", StringEscapeUtils.unescapeXml("&#160;"));
303         assertEquals("XML should be able to unescape >0x7f values with one leading 0",
304                 "\u00A0", StringEscapeUtils.unescapeXml("&#0160;"));
305         assertEquals("XML should be able to unescape >0x7f values with two leading 0s",
306                 "\u00A0", StringEscapeUtils.unescapeXml("&#00160;"));
307         assertEquals("XML should be able to unescape >0x7f values with three leading 0s",
308                 "\u00A0", StringEscapeUtils.unescapeXml("&#000160;"));
309 
310         assertEquals("ain't", StringEscapeUtils.unescapeXml("ain&apos;t"));
311         assertEquals("ain&apos;t", StringEscapeUtils.escapeXml("ain't"));
312         assertEquals("", StringEscapeUtils.escapeXml(""));
313         assertEquals(null, StringEscapeUtils.escapeXml(null));
314         assertEquals(null, StringEscapeUtils.unescapeXml(null));
315 
316         StringWriter sw = new StringWriter();
317         try {
318             StringEscapeUtils.ESCAPE_XML.translate("<abc>", sw);
319         } catch (final IOException e) {
320         }
321         assertEquals("XML was escaped incorrectly", "&lt;abc&gt;", sw.toString() );
322 
323         sw = new StringWriter();
324         try {
325             StringEscapeUtils.UNESCAPE_XML.translate("&lt;abc&gt;", sw);
326         } catch (final IOException e) {
327         }
328         assertEquals("XML was unescaped incorrectly", "<abc>", sw.toString() );
329     }
330     
331     @Test
332     public void testEscapeXml10() throws Exception {
333         assertEquals("a&lt;b&gt;c&quot;d&apos;e&amp;f", StringEscapeUtils.escapeXml10("a<b>c\"d'e&f"));
334         assertEquals("XML 1.0 should not escape \t \n \r",
335                 "a\tb\rc\nd", StringEscapeUtils.escapeXml10("a\tb\rc\nd"));
336         assertEquals("XML 1.0 should omit most #x0-x8 | #xb | #xc | #xe-#x19",
337                 "ab", StringEscapeUtils.escapeXml10("a\u0000\u0001\u0008\u000b\u000c\u000e\u001fb"));
338         assertEquals("XML 1.0 should omit #xd800-#xdfff",
339                 "a\ud7ff  \ue000b", StringEscapeUtils.escapeXml10("a\ud7ff\ud800 \udfff \ue000b"));
340         assertEquals("XML 1.0 should omit #xfffe | #xffff",
341                 "a\ufffdb", StringEscapeUtils.escapeXml10("a\ufffd\ufffe\uffffb"));
342         assertEquals("XML 1.0 should escape #x7f-#x84 | #x86 - #x9f, for XML 1.1 compatibility",
343                 "a\u007e&#127;&#132;\u0085&#134;&#159;\u00a0b", StringEscapeUtils.escapeXml10("a\u007e\u007f\u0084\u0085\u0086\u009f\u00a0b"));
344     }
345     
346     @Test
347     public void testEscapeXml11() throws Exception {
348         assertEquals("a&lt;b&gt;c&quot;d&apos;e&amp;f", StringEscapeUtils.escapeXml11("a<b>c\"d'e&f"));
349         assertEquals("XML 1.1 should not escape \t \n \r",
350                 "a\tb\rc\nd", StringEscapeUtils.escapeXml11("a\tb\rc\nd"));
351         assertEquals("XML 1.1 should omit #x0",
352                 "ab", StringEscapeUtils.escapeXml11("a\u0000b"));
353         assertEquals("XML 1.1 should escape #x1-x8 | #xb | #xc | #xe-#x19",
354                 "a&#1;&#8;&#11;&#12;&#14;&#31;b", StringEscapeUtils.escapeXml11("a\u0001\u0008\u000b\u000c\u000e\u001fb"));
355         assertEquals("XML 1.1 should escape #x7F-#x84 | #x86-#x9F",
356                 "a\u007e&#127;&#132;\u0085&#134;&#159;\u00a0b", StringEscapeUtils.escapeXml11("a\u007e\u007f\u0084\u0085\u0086\u009f\u00a0b"));
357         assertEquals("XML 1.1 should omit #xd800-#xdfff",
358                 "a\ud7ff  \ue000b", StringEscapeUtils.escapeXml11("a\ud7ff\ud800 \udfff \ue000b"));
359         assertEquals("XML 1.1 should omit #xfffe | #xffff",
360                 "a\ufffdb", StringEscapeUtils.escapeXml11("a\ufffd\ufffe\uffffb"));
361     }
362 
363     /**
364      * Tests Supplementary characters. 
365      * <p>
366      * From http://www.w3.org/International/questions/qa-escapes
367      * </p>
368      * <blockquote>
369      * Supplementary characters are those Unicode characters that have code points higher than the characters in
370      * the Basic Multilingual Plane (BMP). In UTF-16 a supplementary character is encoded using two 16-bit surrogate code points from the
371      * BMP. Because of this, some people think that supplementary characters need to be represented using two escapes, but this is incorrect
372      * - you must use the single, code point value for that character. For example, use &amp;&#35;x233B4&#59; rather than
373      * &amp;&#35;xD84C&#59;&amp;&#35;xDFB4&#59;.
374      * </blockquote>
375      * @see <a href="http://www.w3.org/International/questions/qa-escapes">Using character escapes in markup and CSS</a>
376      * @see <a href="https://issues.apache.org/jira/browse/LANG-728">LANG-728</a>
377      */
378     @Test
379     @SuppressWarnings( "deprecation" ) // ESCAPE_XML has been replaced by ESCAPE_XML10 and ESCAPE_XML11 in 3.3
380     public void testEscapeXmlSupplementaryCharacters() {
381         final CharSequenceTranslator escapeXml = 
382             StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) );
383 
384         assertEquals("Supplementary character must be represented using a single escape", "&#144308;",
385                 escapeXml.translate("\uD84C\uDFB4"));
386 
387         assertEquals("Supplementary characters mixed with basic characters should be encoded correctly", "a b c &#144308;",
388                         escapeXml.translate("a b c \uD84C\uDFB4"));
389     }
390     
391     @Test
392     @SuppressWarnings( "deprecation" ) // ESCAPE_XML has been replaced by ESCAPE_XML10 and ESCAPE_XML11 in 3.3
393     public void testEscapeXmlAllCharacters() {
394         // http://www.w3.org/TR/xml/#charsets says:
395         // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] /* any Unicode character,
396         // excluding the surrogate blocks, FFFE, and FFFF. */
397         final CharSequenceTranslator escapeXml = StringEscapeUtils.ESCAPE_XML
398                 .with(NumericEntityEscaper.below(9), NumericEntityEscaper.between(0xB, 0xC), NumericEntityEscaper.between(0xE, 0x19),
399                         NumericEntityEscaper.between(0xD800, 0xDFFF), NumericEntityEscaper.between(0xFFFE, 0xFFFF), NumericEntityEscaper.above(0x110000));
400 
401         assertEquals("&#0;&#1;&#2;&#3;&#4;&#5;&#6;&#7;&#8;", escapeXml.translate("\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008"));
402         assertEquals("\t", escapeXml.translate("\t")); // 0x9
403         assertEquals("\n", escapeXml.translate("\n")); // 0xA
404         assertEquals("&#11;&#12;", escapeXml.translate("\u000B\u000C"));
405         assertEquals("\r", escapeXml.translate("\r")); // 0xD
406         assertEquals("Hello World! Ain&apos;t this great?", escapeXml.translate("Hello World! Ain't this great?"));
407         assertEquals("&#14;&#15;&#24;&#25;", escapeXml.translate("\u000E\u000F\u0018\u0019"));
408     }
409     
410     /**
411      * Reverse of the above.
412      *
413      * @see <a href="https://issues.apache.org/jira/browse/LANG-729">LANG-729</a>
414      */
415     @Test
416     public void testUnescapeXmlSupplementaryCharacters() {
417         assertEquals("Supplementary character must be represented using a single escape", "\uD84C\uDFB4",
418                 StringEscapeUtils.unescapeXml("&#144308;") );
419 
420         assertEquals("Supplementary characters mixed with basic characters should be decoded correctly", "a b c \uD84C\uDFB4",
421                 StringEscapeUtils.unescapeXml("a b c &#144308;") );
422     }
423         
424     // Tests issue #38569
425     // http://issues.apache.org/bugzilla/show_bug.cgi?id=38569
426     @Test
427     public void testStandaloneAmphersand() {
428         assertEquals("<P&O>", StringEscapeUtils.unescapeHtml4("&lt;P&O&gt;"));
429         assertEquals("test & <", StringEscapeUtils.unescapeHtml4("test & &lt;"));
430         assertEquals("<P&O>", StringEscapeUtils.unescapeXml("&lt;P&O&gt;"));
431         assertEquals("test & <", StringEscapeUtils.unescapeXml("test & &lt;"));
432     }
433 
434     @Test
435     public void testLang313() {
436         assertEquals("& &", StringEscapeUtils.unescapeHtml4("& &amp;"));
437     }
438 
439     @Test
440     public void testEscapeCsvString() throws Exception {
441         assertEquals("foo.bar",            StringEscapeUtils.escapeCsv("foo.bar"));
442         assertEquals("\"foo,bar\"",        StringEscapeUtils.escapeCsv("foo,bar"));
443         assertEquals("\"foo\nbar\"",       StringEscapeUtils.escapeCsv("foo\nbar"));
444         assertEquals("\"foo\rbar\"",       StringEscapeUtils.escapeCsv("foo\rbar"));
445         assertEquals("\"foo\"\"bar\"",     StringEscapeUtils.escapeCsv("foo\"bar"));
446         assertEquals("foo\uD84C\uDFB4bar", StringEscapeUtils.escapeCsv("foo\uD84C\uDFB4bar"));
447         assertEquals("",   StringEscapeUtils.escapeCsv(""));
448         assertEquals(null, StringEscapeUtils.escapeCsv(null));
449     }
450 
451     @Test
452     public void testEscapeCsvWriter() throws Exception {
453         checkCsvEscapeWriter("foo.bar",            "foo.bar");
454         checkCsvEscapeWriter("\"foo,bar\"",        "foo,bar");
455         checkCsvEscapeWriter("\"foo\nbar\"",       "foo\nbar");
456         checkCsvEscapeWriter("\"foo\rbar\"",       "foo\rbar");
457         checkCsvEscapeWriter("\"foo\"\"bar\"",     "foo\"bar");
458         checkCsvEscapeWriter("foo\uD84C\uDFB4bar", "foo\uD84C\uDFB4bar");
459         checkCsvEscapeWriter("", null);
460         checkCsvEscapeWriter("", "");
461     }
462 
463     private void checkCsvEscapeWriter(final String expected, final String value) {
464         try {
465             final StringWriter writer = new StringWriter();
466             StringEscapeUtils.ESCAPE_CSV.translate(value, writer);
467             assertEquals(expected, writer.toString());
468         } catch (final IOException e) {
469             fail("Threw: " + e);
470         }
471     }
472 
473     @Test
474     public void testUnescapeCsvString() throws Exception {
475         assertEquals("foo.bar",              StringEscapeUtils.unescapeCsv("foo.bar"));
476         assertEquals("foo,bar",              StringEscapeUtils.unescapeCsv("\"foo,bar\""));
477         assertEquals("foo\nbar",             StringEscapeUtils.unescapeCsv("\"foo\nbar\""));
478         assertEquals("foo\rbar",             StringEscapeUtils.unescapeCsv("\"foo\rbar\""));
479         assertEquals("foo\"bar",             StringEscapeUtils.unescapeCsv("\"foo\"\"bar\""));
480         assertEquals("foo\uD84C\uDFB4bar",   StringEscapeUtils.unescapeCsv("foo\uD84C\uDFB4bar"));
481         assertEquals("",   StringEscapeUtils.unescapeCsv(""));
482         assertEquals(null, StringEscapeUtils.unescapeCsv(null));
483 
484         assertEquals("\"foo.bar\"",          StringEscapeUtils.unescapeCsv("\"foo.bar\""));
485     }
486 
487     @Test
488     public void testUnescapeCsvWriter() throws Exception {
489         checkCsvUnescapeWriter("foo.bar",            "foo.bar");
490         checkCsvUnescapeWriter("foo,bar",            "\"foo,bar\"");
491         checkCsvUnescapeWriter("foo\nbar",           "\"foo\nbar\"");
492         checkCsvUnescapeWriter("foo\rbar",           "\"foo\rbar\"");
493         checkCsvUnescapeWriter("foo\"bar",           "\"foo\"\"bar\"");
494         checkCsvUnescapeWriter("foo\uD84C\uDFB4bar", "foo\uD84C\uDFB4bar");
495         checkCsvUnescapeWriter("", null);
496         checkCsvUnescapeWriter("", "");
497 
498         checkCsvUnescapeWriter("\"foo.bar\"",        "\"foo.bar\"");
499     }
500 
501     private void checkCsvUnescapeWriter(final String expected, final String value) {
502         try {
503             final StringWriter writer = new StringWriter();
504             StringEscapeUtils.UNESCAPE_CSV.translate(value, writer);
505             assertEquals(expected, writer.toString());
506         } catch (final IOException e) {
507             fail("Threw: " + e);
508         }
509     }
510 
511     /**
512      * Tests // https://issues.apache.org/jira/browse/LANG-480
513      * 
514      * @throws java.io.UnsupportedEncodingException
515      */
516     @Test
517     public void testEscapeHtmlHighUnicode() throws java.io.UnsupportedEncodingException {
518         // this is the utf8 representation of the character:
519         // COUNTING ROD UNIT DIGIT THREE
520         // in Unicode
521         // codepoint: U+1D362
522         final byte[] data = new byte[] { (byte)0xF0, (byte)0x9D, (byte)0x8D, (byte)0xA2 };
523 
524         final String original = new String(data, "UTF8");
525 
526         final String escaped = StringEscapeUtils.escapeHtml4( original );
527         assertEquals( "High Unicode should not have been escaped", original, escaped);
528 
529         final String unescaped = StringEscapeUtils.unescapeHtml4( escaped );
530         assertEquals( "High Unicode should have been unchanged", original, unescaped);
531 
532 // TODO: I think this should hold, needs further investigation
533 //        String unescapedFromEntity = StringEscapeUtils.unescapeHtml4( "&#119650;" );
534 //        assertEquals( "High Unicode should have been unescaped", original, unescapedFromEntity);
535     }
536 
537     /**
538      * Tests https://issues.apache.org/jira/browse/LANG-339
539      */
540     @Test
541     public void testEscapeHiragana() {
542         // Some random Japanese Unicode characters
543         final String original = "\u304B\u304C\u3068";
544         final String escaped = StringEscapeUtils.escapeHtml4(original);
545         assertEquals( "Hiragana character Unicode behaviour should not be being escaped by escapeHtml4",
546         original, escaped);
547 
548         final String unescaped = StringEscapeUtils.unescapeHtml4( escaped );
549 
550         assertEquals( "Hiragana character Unicode behaviour has changed - expected no unescaping", escaped, unescaped);
551     }
552 
553     /**
554      * Tests https://issues.apache.org/jira/browse/LANG-708
555      * 
556      * @throws IOException
557      *             if an I/O error occurs
558      */
559     @Test
560     public void testLang708() throws IOException {
561         final FileInputStream fis = new FileInputStream("src/test/resources/lang-708-input.txt");
562         final String input = IOUtils.toString(fis, "UTF-8");
563         final String escaped = StringEscapeUtils.escapeEcmaScript(input);
564         // just the end:
565         assertTrue(escaped, escaped.endsWith("}]"));
566         // a little more:
567         assertTrue(escaped, escaped.endsWith("\"valueCode\\\":\\\"\\\"}]"));
568         fis.close();
569     }
570 
571     /**
572      * Tests https://issues.apache.org/jira/browse/LANG-720
573      */
574     @Test
575     @SuppressWarnings( "deprecation" ) // escapeXml(String) has been replaced by escapeXml10(String) and escapeXml11(String) in 3.3
576     public void testLang720() {
577         final String input = new StringBuilder("\ud842\udfb7").append("A").toString();
578         final String escaped = StringEscapeUtils.escapeXml(input);
579         assertEquals(input, escaped);
580     }
581 
582     /**
583      * Tests https://issues.apache.org/jira/browse/LANG-911
584      */
585     @Test
586     public void testLang911() {
587         String bellsTest = "\ud83d\udc80\ud83d\udd14";
588         String value = StringEscapeUtils.escapeJava(bellsTest);
589         String valueTest = StringEscapeUtils.unescapeJava(value);
590         assertEquals(bellsTest, valueTest);
591     }
592 
593     @Test
594     public void testEscapeJson() {
595         assertEquals(null, StringEscapeUtils.escapeJson(null));
596         try {
597             StringEscapeUtils.ESCAPE_JSON.translate(null, null);
598             fail();
599         } catch (final IOException ex) {
600             fail();
601         } catch (final IllegalArgumentException ex) {
602         }
603         try {
604             StringEscapeUtils.ESCAPE_JSON.translate("", null);
605             fail();
606         } catch (final IOException ex) {
607             fail();
608         } catch (final IllegalArgumentException ex) {
609         }
610 
611         assertEquals("He didn't say, \\\"stop!\\\"", StringEscapeUtils.escapeJson("He didn't say, \"stop!\""));
612 
613         String expected = "\\\"foo\\\" isn't \\\"bar\\\". specials: \\b\\r\\n\\f\\t\\\\\\/";
614         String input ="\"foo\" isn't \"bar\". specials: \b\r\n\f\t\\/";
615 
616         assertEquals(expected, StringEscapeUtils.escapeJson(input));
617     }
618 
619 }