View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.output;
18  
19  import static org.junit.jupiter.api.Assertions.assertArrayEquals;
20  import static org.junit.jupiter.api.Assertions.assertEquals;
21  import static org.junit.jupiter.api.Assertions.assertTrue;
22  
23  import java.io.ByteArrayOutputStream;
24  import java.io.IOException;
25  import java.nio.charset.Charset;
26  import java.nio.charset.StandardCharsets;
27  
28  import org.junit.jupiter.api.Test;
29  import org.junitpioneer.jupiter.DefaultLocale;
30  
31  /**
32   * Tests {@link XmlStreamWriter}.
33   */
34  public class XmlStreamWriterTest {
35  
36      /** French */
37      private static final String TEXT_LATIN1 = "eacute: \u00E9";
38  
39      /** Greek */
40      private static final String TEXT_LATIN7 = "alpha: \u03B1";
41  
42      /** Euro support */
43      private static final String TEXT_LATIN15 = "euro: \u20AC";
44  
45      /** Japanese */
46      private static final String TEXT_EUC_JP = "hiragana A: \u3042";
47  
48      /** Unicode: support everything */
49      private static final String TEXT_UNICODE = TEXT_LATIN1 + ", " + TEXT_LATIN7
50              + ", " + TEXT_LATIN15 + ", " + TEXT_EUC_JP;
51  
52      @SuppressWarnings("resource")
53      private static void checkXmlContent(final String xml, final String encodingName, final String defaultEncodingName)
54              throws IOException {
55          final ByteArrayOutputStream out = new ByteArrayOutputStream();
56          final XmlStreamWriter writerCheck;
57          try (final XmlStreamWriter writer = XmlStreamWriter.builder().setOutputStream(out).setCharset(defaultEncodingName).get()) {
58              writerCheck = writer;
59              writer.write(xml);
60          }
61          final byte[] xmlContent = out.toByteArray();
62          final Charset charset = Charset.forName(encodingName);
63          final Charset writerCharset = Charset.forName(writerCheck.getEncoding());
64          assertEquals(charset, writerCharset);
65          assertTrue(writerCharset.contains(charset), writerCharset.name());
66          assertArrayEquals(xml.getBytes(encodingName), xmlContent);
67      }
68  
69      private static void checkXmlWriter(final String text, final String encoding)
70              throws IOException {
71          checkXmlWriter(text, encoding, null);
72      }
73  
74      private static void checkXmlWriter(final String text, final String encoding, final String defaultEncoding)
75              throws IOException {
76          final String xml = createXmlContent(text, encoding);
77          String effectiveEncoding = encoding;
78          if (effectiveEncoding == null) {
79              effectiveEncoding = defaultEncoding == null ? StandardCharsets.UTF_8.name() : defaultEncoding;
80          }
81          checkXmlContent(xml, effectiveEncoding, defaultEncoding);
82      }
83  
84      private static String createXmlContent(final String text, final String encoding) {
85          String xmlDecl = "<?xml version=\"1.0\"?>";
86          if (encoding != null) {
87              xmlDecl = "<?xml version=\"1.0\" encoding=\"" + encoding + "\"?>";
88          }
89          return xmlDecl + "\n<text>" + text + "</text>";
90      }
91  
92      @Test
93      public void testDefaultEncoding() throws IOException {
94          checkXmlWriter(TEXT_UNICODE, null, null);
95          checkXmlWriter(TEXT_UNICODE, null, StandardCharsets.UTF_8.name());
96          checkXmlWriter(TEXT_UNICODE, null, StandardCharsets.UTF_16.name());
97          checkXmlWriter(TEXT_UNICODE, null, StandardCharsets.UTF_16BE.name());
98          checkXmlWriter(TEXT_UNICODE, null, StandardCharsets.ISO_8859_1.name());
99      }
100 
101     @Test
102     public void testEBCDICEncoding() throws IOException {
103         checkXmlWriter("simple text in EBCDIC", "CP1047");
104     }
105 
106     @Test
107     public void testEmpty() throws IOException {
108         try (final ByteArrayOutputStream out = new ByteArrayOutputStream();
109                 XmlStreamWriter writer = new XmlStreamWriter(out)) {
110             writer.flush();
111             writer.write("");
112             writer.flush();
113             writer.write(".");
114             writer.flush();
115         }
116         try (final ByteArrayOutputStream out = new ByteArrayOutputStream();
117                 XmlStreamWriter writer = XmlStreamWriter.builder().setOutputStream(out).get()) {
118             writer.flush();
119             writer.write("");
120             writer.flush();
121             writer.write(".");
122             writer.flush();
123         }
124     }
125 
126     @Test
127     public void testEUC_JPEncoding() throws IOException {
128         checkXmlWriter(TEXT_EUC_JP, "EUC-JP");
129     }
130 
131     @Test
132     public void testLatin15Encoding() throws IOException {
133         checkXmlWriter(TEXT_LATIN15, "ISO-8859-15");
134     }
135 
136     @Test
137     public void testLatin1Encoding() throws IOException {
138         checkXmlWriter(TEXT_LATIN1, StandardCharsets.ISO_8859_1.name());
139     }
140 
141     @Test
142     public void testLatin7Encoding() throws IOException {
143         checkXmlWriter(TEXT_LATIN7, "ISO-8859-7");
144     }
145 
146     /** Turkish language has specific rules to convert dotted and dotless i character. */
147     @Test
148     @DefaultLocale(language = "tr")
149     public void testLowerCaseEncodingWithTurkishLocale_IO_557() throws IOException {
150         checkXmlWriter(TEXT_UNICODE, "utf-8");
151         checkXmlWriter(TEXT_LATIN1, "iso-8859-1");
152         checkXmlWriter(TEXT_LATIN7, "iso-8859-7");
153     }
154 
155     @Test
156     public void testNoXmlHeader() throws IOException {
157         checkXmlContent("<text>text with no XML header</text>", StandardCharsets.UTF_8.name(), null);
158     }
159 
160     @Test
161     public void testUTF16BEEncoding() throws IOException {
162         checkXmlWriter(TEXT_UNICODE, StandardCharsets.UTF_16BE.name());
163     }
164 
165     @Test
166     public void testUTF16Encoding() throws IOException {
167         checkXmlWriter(TEXT_UNICODE, StandardCharsets.UTF_16.name());
168     }
169 
170     @Test
171     public void testUTF16LEEncoding() throws IOException {
172         checkXmlWriter(TEXT_UNICODE, StandardCharsets.UTF_16LE.name());
173     }
174 
175     @Test
176     public void testUTF8Encoding() throws IOException {
177         checkXmlWriter(TEXT_UNICODE, StandardCharsets.UTF_8.name());
178     }
179 }