View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.lang3;
18  
19  import static org.junit.Assert.assertEquals;
20  import static org.junit.Assert.assertFalse;
21  import static org.junit.Assert.assertNotNull;
22  import static org.junit.Assert.assertTrue;
23  import static org.junit.Assert.fail;
24  
25  import java.io.FileInputStream;
26  import java.io.IOException;
27  import java.io.StringWriter;
28  import java.lang.reflect.Constructor;
29  import java.lang.reflect.Modifier;
30  import java.nio.charset.Charset;
31  
32  import org.apache.commons.io.IOUtils;
33  import org.apache.commons.lang3.text.translate.CharSequenceTranslator;
34  import org.apache.commons.lang3.text.translate.NumericEntityEscaper;
35  import org.junit.Test;
36  
37  /**
38   * Unit tests for {@link StringEscapeUtils}.
39   */
40  public class StringEscapeUtilsTest {
41      private final static String FOO = "foo";
42  
43      @Test
44      public void testConstructor() {
45          assertNotNull(new StringEscapeUtils());
46          final Constructor<?>[] cons = StringEscapeUtils.class.getDeclaredConstructors();
47          assertEquals(1, cons.length);
48          assertTrue(Modifier.isPublic(cons[0].getModifiers()));
49          assertTrue(Modifier.isPublic(StringEscapeUtils.class.getModifiers()));
50          assertFalse(Modifier.isFinal(StringEscapeUtils.class.getModifiers()));
51      }
52      
53      @Test
54      public void testEscapeJava() throws IOException {
55          assertEquals(null, StringEscapeUtils.escapeJava(null));
56          try {
57              StringEscapeUtils.ESCAPE_JAVA.translate(null, null);
58              fail();
59          } catch (final IOException ex) {
60              fail();
61          } catch (final IllegalArgumentException ex) {
62          }
63          try {
64              StringEscapeUtils.ESCAPE_JAVA.translate("", null);
65              fail();
66          } catch (final IOException ex) {
67              fail();
68          } catch (final IllegalArgumentException ex) {
69          }
70          
71          assertEscapeJava("empty string", "", "");
72          assertEscapeJava(FOO, FOO);
73          assertEscapeJava("tab", "\\t", "\t");
74          assertEscapeJava("backslash", "\\\\", "\\");
75          assertEscapeJava("single quote should not be escaped", "'", "'");
76          assertEscapeJava("\\\\\\b\\t\\r", "\\\b\t\r");
77          assertEscapeJava("\\u1234", "\u1234");
78          assertEscapeJava("\\u0234", "\u0234");
79          assertEscapeJava("\\u00EF", "\u00ef");
80          assertEscapeJava("\\u0001", "\u0001");
81          assertEscapeJava("Should use capitalized Unicode hex", "\\uABCD", "\uabcd");
82  
83          assertEscapeJava("He didn't say, \\\"stop!\\\"",
84                  "He didn't say, \"stop!\"");
85          assertEscapeJava("non-breaking space", "This space is non-breaking:" + "\\u00A0",
86                  "This space is non-breaking:\u00a0");
87          assertEscapeJava("\\uABCD\\u1234\\u012C",
88                  "\uABCD\u1234\u012C");
89      }
90  
91      /**
92       * Tests https://issues.apache.org/jira/browse/LANG-421
93       */
94      @Test
95      public void testEscapeJavaWithSlash() {
96          final String input = "String with a slash (/) in it";
97  
98          final String expected = input;
99          final String actual = StringEscapeUtils.escapeJava(input);
100 
101         /**
102          * In 2.4 StringEscapeUtils.escapeJava(String) escapes '/' characters, which are not a valid character to escape
103          * in a Java string.
104          */
105         assertEquals(expected, actual);
106     }
107     
108     private void assertEscapeJava(final String escaped, final String original) throws IOException {
109         assertEscapeJava(null, escaped, original);
110     }
111 
112     private void assertEscapeJava(String message, final String expected, final String original) throws IOException {
113         final String converted = StringEscapeUtils.escapeJava(original);
114         message = "escapeJava(String) failed" + (message == null ? "" : (": " + message));
115         assertEquals(message, expected, converted);
116 
117         final StringWriter writer = new StringWriter();
118         StringEscapeUtils.ESCAPE_JAVA.translate(original, writer);
119         assertEquals(expected, writer.toString());
120     }
121 
122     @Test
123     public void testUnescapeJava() throws IOException {
124         assertEquals(null, StringEscapeUtils.unescapeJava(null));
125         try {
126             StringEscapeUtils.UNESCAPE_JAVA.translate(null, null);
127             fail();
128         } catch (final IOException ex) {
129             fail();
130         } catch (final IllegalArgumentException ex) {
131         }
132         try {
133             StringEscapeUtils.UNESCAPE_JAVA.translate("", null);
134             fail();
135         } catch (final IOException ex) {
136             fail();
137         } catch (final IllegalArgumentException ex) {
138         }
139         try {
140             StringEscapeUtils.unescapeJava("\\u02-3");
141             fail();
142         } catch (final RuntimeException ex) {
143         }
144         
145         assertUnescapeJava("", "");
146         assertUnescapeJava("test", "test");
147         assertUnescapeJava("\ntest\b", "\\ntest\\b");
148         assertUnescapeJava("\u123425foo\ntest\b", "\\u123425foo\\ntest\\b");
149         assertUnescapeJava("'\foo\teste\r", "\\'\\foo\\teste\\r");
150         assertUnescapeJava("", "\\");
151         //foo
152         assertUnescapeJava("lowercase Unicode", "\uABCDx", "\\uabcdx");
153         assertUnescapeJava("uppercase Unicode", "\uABCDx", "\\uABCDx");
154         assertUnescapeJava("Unicode as final character", "\uABCD", "\\uabcd");
155     }
156 
157     private void assertUnescapeJava(final String unescaped, final String original) throws IOException {
158         assertUnescapeJava(null, unescaped, original);
159     }
160 
161     private void assertUnescapeJava(final String message, final String unescaped, final String original) throws IOException {
162         final String expected = unescaped;
163         final String actual = StringEscapeUtils.unescapeJava(original);
164 
165         assertEquals("unescape(String) failed" +
166                 (message == null ? "" : (": " + message)) +
167                 ": expected '" + StringEscapeUtils.escapeJava(expected) +
168                 // we escape this so we can see it in the error message
169                 "' actual '" + StringEscapeUtils.escapeJava(actual) + "'",
170                 expected, actual);
171 
172         final StringWriter writer = new StringWriter();
173         StringEscapeUtils.UNESCAPE_JAVA.translate(original, writer);
174         assertEquals(unescaped, writer.toString());
175 
176     }
177 
178     @Test
179     public void testEscapeEcmaScript() {
180         assertEquals(null, StringEscapeUtils.escapeEcmaScript(null));
181         try {
182             StringEscapeUtils.ESCAPE_ECMASCRIPT.translate(null, null);
183             fail();
184         } catch (final IOException ex) {
185             fail();
186         } catch (final IllegalArgumentException ex) {
187         }
188         try {
189             StringEscapeUtils.ESCAPE_ECMASCRIPT.translate("", null);
190             fail();
191         } catch (final IOException ex) {
192             fail();
193         } catch (final IllegalArgumentException ex) {
194         }
195         
196         assertEquals("He didn\\'t say, \\\"stop!\\\"", StringEscapeUtils.escapeEcmaScript("He didn't say, \"stop!\""));
197         assertEquals("document.getElementById(\\\"test\\\").value = \\'<script>alert(\\'aaa\\');<\\/script>\\';", 
198                 StringEscapeUtils.escapeEcmaScript("document.getElementById(\"test\").value = '<script>alert('aaa');</script>';"));
199     }
200 
201 
202     // HTML and XML
203     //--------------------------------------------------------------
204 
205     private static final String[][] HTML_ESCAPES = {
206         {"no escaping", "plain text", "plain text"},
207         {"no escaping", "plain text", "plain text"},
208         {"empty string", "", ""},
209         {"null", null, null},
210         {"ampersand", "bread &amp; butter", "bread & butter"},
211         {"quotes", "&quot;bread&quot; &amp; butter", "\"bread\" & butter"},
212         {"final character only", "greater than &gt;", "greater than >"},
213         {"first character only", "&lt; less than", "< less than"},
214         {"apostrophe", "Huntington's chorea", "Huntington's chorea"},
215         {"languages", "English,Fran&ccedil;ais,\u65E5\u672C\u8A9E (nihongo)", "English,Fran\u00E7ais,\u65E5\u672C\u8A9E (nihongo)"},
216         {"8-bit ascii shouldn't number-escape", "\u0080\u009F", "\u0080\u009F"},
217     };
218 
219     @Test
220     public void testEscapeHtml() {
221         for (final String[] element : HTML_ESCAPES) {
222             final String message = element[0];
223             final String expected = element[1];
224             final String original = element[2];
225             assertEquals(message, expected, StringEscapeUtils.escapeHtml4(original));
226             final StringWriter sw = new StringWriter();
227             try {
228                 StringEscapeUtils.ESCAPE_HTML4.translate(original, sw);
229             } catch (final IOException e) {
230             }
231             final String actual = original == null ? null : sw.toString();
232             assertEquals(message, expected, actual);
233         }
234     }
235 
236     @Test
237     public void testUnescapeHtml4() {
238         for (final String[] element : HTML_ESCAPES) {
239             final String message = element[0];
240             final String expected = element[2];
241             final String original = element[1];
242             assertEquals(message, expected, StringEscapeUtils.unescapeHtml4(original));
243             
244             final StringWriter sw = new StringWriter();
245             try {
246                 StringEscapeUtils.UNESCAPE_HTML4.translate(original, sw);
247             } catch (final IOException e) {
248             }
249             final String actual = original == null ? null : sw.toString();
250             assertEquals(message, expected, actual);
251         }
252         // \u00E7 is a cedilla (c with wiggle under)
253         // note that the test string must be 7-bit-clean (Unicode escaped) or else it will compile incorrectly
254         // on some locales        
255         assertEquals("funny chars pass through OK", "Fran\u00E7ais", StringEscapeUtils.unescapeHtml4("Fran\u00E7ais"));
256         
257         assertEquals("Hello&;World", StringEscapeUtils.unescapeHtml4("Hello&;World"));
258         assertEquals("Hello&#;World", StringEscapeUtils.unescapeHtml4("Hello&#;World"));
259         assertEquals("Hello&# ;World", StringEscapeUtils.unescapeHtml4("Hello&# ;World"));
260         assertEquals("Hello&##;World", StringEscapeUtils.unescapeHtml4("Hello&##;World"));
261     }
262 
263     @Test
264     public void testUnescapeHexCharsHtml() {
265         // Simple easy to grok test 
266         assertEquals("hex number unescape", "\u0080\u009F", StringEscapeUtils.unescapeHtml4("&#x80;&#x9F;"));
267         assertEquals("hex number unescape", "\u0080\u009F", StringEscapeUtils.unescapeHtml4("&#X80;&#X9F;"));
268         // Test all Character values:
269         for (char i = Character.MIN_VALUE; i < Character.MAX_VALUE; i++) {
270             final Character c1 = new Character(i);
271             final Character c2 = new Character((char)(i+1));
272             final String expected = c1.toString() + c2.toString();
273             final String escapedC1 = "&#x" + Integer.toHexString((c1.charValue())) + ";";
274             final String escapedC2 = "&#x" + Integer.toHexString((c2.charValue())) + ";";
275             assertEquals("hex number unescape index " + (int)i, expected, StringEscapeUtils.unescapeHtml4(escapedC1 + escapedC2));
276         }
277     }
278 
279     @Test
280     public void testUnescapeUnknownEntity() throws Exception {
281         assertEquals("&zzzz;", StringEscapeUtils.unescapeHtml4("&zzzz;"));
282     }
283 
284     @Test
285     public void testEscapeHtmlVersions() throws Exception {
286         assertEquals("&Beta;", StringEscapeUtils.escapeHtml4("\u0392"));
287         assertEquals("\u0392", StringEscapeUtils.unescapeHtml4("&Beta;"));
288 
289         // TODO: refine API for escaping/unescaping specific HTML versions
290     }
291 
292     @Test
293     @SuppressWarnings( "deprecation" ) // ESCAPE_XML has been replaced by ESCAPE_XML10 and ESCAPE_XML11 in 3.3
294     public void testEscapeXml() throws Exception {
295         assertEquals("&lt;abc&gt;", StringEscapeUtils.escapeXml("<abc>"));
296         assertEquals("<abc>", StringEscapeUtils.unescapeXml("&lt;abc&gt;"));
297 
298         assertEquals("XML should not escape >0x7f values",
299                 "\u00A1", StringEscapeUtils.escapeXml("\u00A1"));
300         assertEquals("XML should be able to unescape >0x7f values",
301                 "\u00A0", StringEscapeUtils.unescapeXml("&#160;"));
302         assertEquals("XML should be able to unescape >0x7f values with one leading 0",
303                 "\u00A0", StringEscapeUtils.unescapeXml("&#0160;"));
304         assertEquals("XML should be able to unescape >0x7f values with two leading 0s",
305                 "\u00A0", StringEscapeUtils.unescapeXml("&#00160;"));
306         assertEquals("XML should be able to unescape >0x7f values with three leading 0s",
307                 "\u00A0", StringEscapeUtils.unescapeXml("&#000160;"));
308 
309         assertEquals("ain't", StringEscapeUtils.unescapeXml("ain&apos;t"));
310         assertEquals("ain&apos;t", StringEscapeUtils.escapeXml("ain't"));
311         assertEquals("", StringEscapeUtils.escapeXml(""));
312         assertEquals(null, StringEscapeUtils.escapeXml(null));
313         assertEquals(null, StringEscapeUtils.unescapeXml(null));
314 
315         StringWriter sw = new StringWriter();
316         try {
317             StringEscapeUtils.ESCAPE_XML.translate("<abc>", sw);
318         } catch (final IOException e) {
319         }
320         assertEquals("XML was escaped incorrectly", "&lt;abc&gt;", sw.toString() );
321 
322         sw = new StringWriter();
323         try {
324             StringEscapeUtils.UNESCAPE_XML.translate("&lt;abc&gt;", sw);
325         } catch (final IOException e) {
326         }
327         assertEquals("XML was unescaped incorrectly", "<abc>", sw.toString() );
328     }
329     
330     @Test
331     public void testEscapeXml10() throws Exception {
332         assertEquals("a&lt;b&gt;c&quot;d&apos;e&amp;f", StringEscapeUtils.escapeXml10("a<b>c\"d'e&f"));
333         assertEquals("XML 1.0 should not escape \t \n \r",
334                 "a\tb\rc\nd", StringEscapeUtils.escapeXml10("a\tb\rc\nd"));
335         assertEquals("XML 1.0 should omit most #x0-x8 | #xb | #xc | #xe-#x19",
336                 "ab", StringEscapeUtils.escapeXml10("a\u0000\u0001\u0008\u000b\u000c\u000e\u001fb"));
337         assertEquals("XML 1.0 should omit #xd800-#xdfff",
338                 "a\ud7ff  \ue000b", StringEscapeUtils.escapeXml10("a\ud7ff\ud800 \udfff \ue000b"));
339         assertEquals("XML 1.0 should omit #xfffe | #xffff",
340                 "a\ufffdb", StringEscapeUtils.escapeXml10("a\ufffd\ufffe\uffffb"));
341         assertEquals("XML 1.0 should escape #x7f-#x84 | #x86 - #x9f, for XML 1.1 compatibility",
342                 "a\u007e&#127;&#132;\u0085&#134;&#159;\u00a0b", StringEscapeUtils.escapeXml10("a\u007e\u007f\u0084\u0085\u0086\u009f\u00a0b"));
343     }
344     
345     @Test
346     public void testEscapeXml11() throws Exception {
347         assertEquals("a&lt;b&gt;c&quot;d&apos;e&amp;f", StringEscapeUtils.escapeXml11("a<b>c\"d'e&f"));
348         assertEquals("XML 1.1 should not escape \t \n \r",
349                 "a\tb\rc\nd", StringEscapeUtils.escapeXml11("a\tb\rc\nd"));
350         assertEquals("XML 1.1 should omit #x0",
351                 "ab", StringEscapeUtils.escapeXml11("a\u0000b"));
352         assertEquals("XML 1.1 should escape #x1-x8 | #xb | #xc | #xe-#x19",
353                 "a&#1;&#8;&#11;&#12;&#14;&#31;b", StringEscapeUtils.escapeXml11("a\u0001\u0008\u000b\u000c\u000e\u001fb"));
354         assertEquals("XML 1.1 should escape #x7F-#x84 | #x86-#x9F",
355                 "a\u007e&#127;&#132;\u0085&#134;&#159;\u00a0b", StringEscapeUtils.escapeXml11("a\u007e\u007f\u0084\u0085\u0086\u009f\u00a0b"));
356         assertEquals("XML 1.1 should omit #xd800-#xdfff",
357                 "a\ud7ff  \ue000b", StringEscapeUtils.escapeXml11("a\ud7ff\ud800 \udfff \ue000b"));
358         assertEquals("XML 1.1 should omit #xfffe | #xffff",
359                 "a\ufffdb", StringEscapeUtils.escapeXml11("a\ufffd\ufffe\uffffb"));
360     }
361 
362     /**
363      * Tests Supplementary characters. 
364      * <p>
365      * From http://www.w3.org/International/questions/qa-escapes
366      * </p>
367      * <blockquote>
368      * Supplementary characters are those Unicode characters that have code points higher than the characters in
369      * the Basic Multilingual Plane (BMP). In UTF-16 a supplementary character is encoded using two 16-bit surrogate code points from the
370      * BMP. Because of this, some people think that supplementary characters need to be represented using two escapes, but this is incorrect
371      * - you must use the single, code point value for that character. For example, use &amp;&#35;x233B4&#59; rather than
372      * &amp;&#35;xD84C&#59;&amp;&#35;xDFB4&#59;.
373      * </blockquote>
374      * @see <a href="http://www.w3.org/International/questions/qa-escapes">Using character escapes in markup and CSS</a>
375      * @see <a href="https://issues.apache.org/jira/browse/LANG-728">LANG-728</a>
376      */
377     @Test
378     @SuppressWarnings( "deprecation" ) // ESCAPE_XML has been replaced by ESCAPE_XML10 and ESCAPE_XML11 in 3.3
379     public void testEscapeXmlSupplementaryCharacters() {
380         final CharSequenceTranslator escapeXml = 
381             StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) );
382 
383         assertEquals("Supplementary character must be represented using a single escape", "&#144308;",
384                 escapeXml.translate("\uD84C\uDFB4"));
385 
386         assertEquals("Supplementary characters mixed with basic characters should be encoded correctly", "a b c &#144308;",
387                         escapeXml.translate("a b c \uD84C\uDFB4"));
388     }
389     
390     @Test
391     @SuppressWarnings( "deprecation" ) // ESCAPE_XML has been replaced by ESCAPE_XML10 and ESCAPE_XML11 in 3.3
392     public void testEscapeXmlAllCharacters() {
393         // http://www.w3.org/TR/xml/#charsets says:
394         // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] /* any Unicode character,
395         // excluding the surrogate blocks, FFFE, and FFFF. */
396         final CharSequenceTranslator escapeXml = StringEscapeUtils.ESCAPE_XML
397                 .with(NumericEntityEscaper.below(9), NumericEntityEscaper.between(0xB, 0xC), NumericEntityEscaper.between(0xE, 0x19),
398                         NumericEntityEscaper.between(0xD800, 0xDFFF), NumericEntityEscaper.between(0xFFFE, 0xFFFF), NumericEntityEscaper.above(0x110000));
399 
400         assertEquals("&#0;&#1;&#2;&#3;&#4;&#5;&#6;&#7;&#8;", escapeXml.translate("\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008"));
401         assertEquals("\t", escapeXml.translate("\t")); // 0x9
402         assertEquals("\n", escapeXml.translate("\n")); // 0xA
403         assertEquals("&#11;&#12;", escapeXml.translate("\u000B\u000C"));
404         assertEquals("\r", escapeXml.translate("\r")); // 0xD
405         assertEquals("Hello World! Ain&apos;t this great?", escapeXml.translate("Hello World! Ain't this great?"));
406         assertEquals("&#14;&#15;&#24;&#25;", escapeXml.translate("\u000E\u000F\u0018\u0019"));
407     }
408     
409     /**
410      * Reverse of the above.
411      *
412      * @see <a href="https://issues.apache.org/jira/browse/LANG-729">LANG-729</a>
413      */
414     @Test
415     public void testUnescapeXmlSupplementaryCharacters() {
416         assertEquals("Supplementary character must be represented using a single escape", "\uD84C\uDFB4",
417                 StringEscapeUtils.unescapeXml("&#144308;") );
418 
419         assertEquals("Supplementary characters mixed with basic characters should be decoded correctly", "a b c \uD84C\uDFB4",
420                 StringEscapeUtils.unescapeXml("a b c &#144308;") );
421     }
422         
423     // Tests issue #38569
424     // http://issues.apache.org/bugzilla/show_bug.cgi?id=38569
425     @Test
426     public void testStandaloneAmphersand() {
427         assertEquals("<P&O>", StringEscapeUtils.unescapeHtml4("&lt;P&O&gt;"));
428         assertEquals("test & <", StringEscapeUtils.unescapeHtml4("test & &lt;"));
429         assertEquals("<P&O>", StringEscapeUtils.unescapeXml("&lt;P&O&gt;"));
430         assertEquals("test & <", StringEscapeUtils.unescapeXml("test & &lt;"));
431     }
432 
433     @Test
434     public void testLang313() {
435         assertEquals("& &", StringEscapeUtils.unescapeHtml4("& &amp;"));
436     }
437 
438     @Test
439     public void testEscapeCsvString() throws Exception {
440         assertEquals("foo.bar",            StringEscapeUtils.escapeCsv("foo.bar"));
441         assertEquals("\"foo,bar\"",        StringEscapeUtils.escapeCsv("foo,bar"));
442         assertEquals("\"foo\nbar\"",       StringEscapeUtils.escapeCsv("foo\nbar"));
443         assertEquals("\"foo\rbar\"",       StringEscapeUtils.escapeCsv("foo\rbar"));
444         assertEquals("\"foo\"\"bar\"",     StringEscapeUtils.escapeCsv("foo\"bar"));
445         assertEquals("foo\uD84C\uDFB4bar", StringEscapeUtils.escapeCsv("foo\uD84C\uDFB4bar"));
446         assertEquals("",   StringEscapeUtils.escapeCsv(""));
447         assertEquals(null, StringEscapeUtils.escapeCsv(null));
448     }
449 
450     @Test
451     public void testEscapeCsvWriter() throws Exception {
452         checkCsvEscapeWriter("foo.bar",            "foo.bar");
453         checkCsvEscapeWriter("\"foo,bar\"",        "foo,bar");
454         checkCsvEscapeWriter("\"foo\nbar\"",       "foo\nbar");
455         checkCsvEscapeWriter("\"foo\rbar\"",       "foo\rbar");
456         checkCsvEscapeWriter("\"foo\"\"bar\"",     "foo\"bar");
457         checkCsvEscapeWriter("foo\uD84C\uDFB4bar", "foo\uD84C\uDFB4bar");
458         checkCsvEscapeWriter("", null);
459         checkCsvEscapeWriter("", "");
460     }
461 
462     private void checkCsvEscapeWriter(final String expected, final String value) {
463         try {
464             final StringWriter writer = new StringWriter();
465             StringEscapeUtils.ESCAPE_CSV.translate(value, writer);
466             assertEquals(expected, writer.toString());
467         } catch (final IOException e) {
468             fail("Threw: " + e);
469         }
470     }
471 
472     @Test
473     public void testUnescapeCsvString() throws Exception {
474         assertEquals("foo.bar",              StringEscapeUtils.unescapeCsv("foo.bar"));
475         assertEquals("foo,bar",              StringEscapeUtils.unescapeCsv("\"foo,bar\""));
476         assertEquals("foo\nbar",             StringEscapeUtils.unescapeCsv("\"foo\nbar\""));
477         assertEquals("foo\rbar",             StringEscapeUtils.unescapeCsv("\"foo\rbar\""));
478         assertEquals("foo\"bar",             StringEscapeUtils.unescapeCsv("\"foo\"\"bar\""));
479         assertEquals("foo\uD84C\uDFB4bar",   StringEscapeUtils.unescapeCsv("foo\uD84C\uDFB4bar"));
480         assertEquals("",   StringEscapeUtils.unescapeCsv(""));
481         assertEquals(null, StringEscapeUtils.unescapeCsv(null));
482 
483         assertEquals("\"foo.bar\"",          StringEscapeUtils.unescapeCsv("\"foo.bar\""));
484     }
485 
486     @Test
487     public void testUnescapeCsvWriter() throws Exception {
488         checkCsvUnescapeWriter("foo.bar",            "foo.bar");
489         checkCsvUnescapeWriter("foo,bar",            "\"foo,bar\"");
490         checkCsvUnescapeWriter("foo\nbar",           "\"foo\nbar\"");
491         checkCsvUnescapeWriter("foo\rbar",           "\"foo\rbar\"");
492         checkCsvUnescapeWriter("foo\"bar",           "\"foo\"\"bar\"");
493         checkCsvUnescapeWriter("foo\uD84C\uDFB4bar", "foo\uD84C\uDFB4bar");
494         checkCsvUnescapeWriter("", null);
495         checkCsvUnescapeWriter("", "");
496 
497         checkCsvUnescapeWriter("\"foo.bar\"",        "\"foo.bar\"");
498     }
499 
500     private void checkCsvUnescapeWriter(final String expected, final String value) {
501         try {
502             final StringWriter writer = new StringWriter();
503             StringEscapeUtils.UNESCAPE_CSV.translate(value, writer);
504             assertEquals(expected, writer.toString());
505         } catch (final IOException e) {
506             fail("Threw: " + e);
507         }
508     }
509 
510     /**
511      * Tests // https://issues.apache.org/jira/browse/LANG-480
512      */
513     @Test
514     public void testEscapeHtmlHighUnicode() {
515         // this is the utf8 representation of the character:
516         // COUNTING ROD UNIT DIGIT THREE
517         // in Unicode
518         // codepoint: U+1D362
519         final byte[] data = new byte[] { (byte)0xF0, (byte)0x9D, (byte)0x8D, (byte)0xA2 };
520 
521         final String original = new String(data, Charset.forName("UTF8"));
522 
523         final String escaped = StringEscapeUtils.escapeHtml4( original );
524         assertEquals( "High Unicode should not have been escaped", original, escaped);
525 
526         final String unescaped = StringEscapeUtils.unescapeHtml4( escaped );
527         assertEquals( "High Unicode should have been unchanged", original, unescaped);
528 
529 // TODO: I think this should hold, needs further investigation
530 //        String unescapedFromEntity = StringEscapeUtils.unescapeHtml4( "&#119650;" );
531 //        assertEquals( "High Unicode should have been unescaped", original, unescapedFromEntity);
532     }
533 
534     /**
535      * Tests https://issues.apache.org/jira/browse/LANG-339
536      */
537     @Test
538     public void testEscapeHiragana() {
539         // Some random Japanese Unicode characters
540         final String original = "\u304B\u304C\u3068";
541         final String escaped = StringEscapeUtils.escapeHtml4(original);
542         assertEquals( "Hiragana character Unicode behaviour should not be being escaped by escapeHtml4",
543         original, escaped);
544 
545         final String unescaped = StringEscapeUtils.unescapeHtml4( escaped );
546 
547         assertEquals( "Hiragana character Unicode behaviour has changed - expected no unescaping", escaped, unescaped);
548     }
549 
550     /**
551      * Tests https://issues.apache.org/jira/browse/LANG-708
552      * 
553      * @throws IOException
554      *             if an I/O error occurs
555      */
556     @Test
557     public void testLang708() throws IOException {
558         final FileInputStream fis = new FileInputStream("src/test/resources/lang-708-input.txt");
559         final String input = IOUtils.toString(fis, "UTF-8");
560         final String escaped = StringEscapeUtils.escapeEcmaScript(input);
561         // just the end:
562         assertTrue(escaped, escaped.endsWith("}]"));
563         // a little more:
564         assertTrue(escaped, escaped.endsWith("\"valueCode\\\":\\\"\\\"}]"));
565         fis.close();
566     }
567 
568     /**
569      * Tests https://issues.apache.org/jira/browse/LANG-720
570      */
571     @Test
572     @SuppressWarnings( "deprecation" ) // escapeXml(String) has been replaced by escapeXml10(String) and escapeXml11(String) in 3.3
573     public void testLang720() {
574         final String input = new StringBuilder("\ud842\udfb7").append("A").toString();
575         final String escaped = StringEscapeUtils.escapeXml(input);
576         assertEquals(input, escaped);
577     }
578 
579     /**
580      * Tests https://issues.apache.org/jira/browse/LANG-911
581      */
582     @Test
583     public void testLang911() {
584         final String bellsTest = "\ud83d\udc80\ud83d\udd14";
585         final String value = StringEscapeUtils.escapeJava(bellsTest);
586         final String valueTest = StringEscapeUtils.unescapeJava(value);
587         assertEquals(bellsTest, valueTest);
588     }
589 
590     @Test
591     public void testEscapeJson() {
592         assertEquals(null, StringEscapeUtils.escapeJson(null));
593         try {
594             StringEscapeUtils.ESCAPE_JSON.translate(null, null);
595             fail();
596         } catch (final IOException ex) {
597             fail();
598         } catch (final IllegalArgumentException ex) {
599         }
600         try {
601             StringEscapeUtils.ESCAPE_JSON.translate("", null);
602             fail();
603         } catch (final IOException ex) {
604             fail();
605         } catch (final IllegalArgumentException ex) {
606         }
607 
608         assertEquals("He didn't say, \\\"stop!\\\"", StringEscapeUtils.escapeJson("He didn't say, \"stop!\""));
609 
610         final String expected = "\\\"foo\\\" isn't \\\"bar\\\". specials: \\b\\r\\n\\f\\t\\\\\\/";
611         final String input ="\"foo\" isn't \"bar\". specials: \b\r\n\f\t\\/";
612 
613         assertEquals(expected, StringEscapeUtils.escapeJson(input));
614     }
615 
616 }