View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.input;
18  
19  import static org.junit.jupiter.api.Assertions.assertEquals;
20  import static org.junit.jupiter.api.Assertions.assertNull;
21  import static org.junit.jupiter.api.Assertions.assertTrue;
22  import static org.junit.jupiter.api.Assertions.fail;
23  
24  import java.io.IOException;
25  
26  import org.junit.jupiter.api.Test;
27  
28  /**
29   * Test the Encoding Utilities part of {@link XmlStreamReader}.
30   */
31  public class XmlStreamReaderUtilitiesTest {
32  
33      /** Mock {@link XmlStreamReader} implementation */
34      private static final class MockXmlStreamReader extends XmlStreamReader {
35          MockXmlStreamReader(final String defaultEncoding) throws IOException {
36              super(CharSequenceInputStream.builder().setCharSequence("").get(), null, true, defaultEncoding);
37          }
38      }
39      private static final String RAWMGS1 = "encoding mismatch";
40      private static final String RAWMGS2 = "unknown BOM";
41      private static final String HTTPMGS1 = "BOM must be null";
42      private static final String HTTPMGS2 = "encoding mismatch";
43  
44      private static final String HTTPMGS3 = "Illegal MIME";
45      private static final String APPXML         = "application/xml";
46      private static final String APPXML_UTF8    = "application/xml;charset=UTF-8";
47      private static final String APPXML_UTF16   = "application/xml;charset=UTF-16";
48      private static final String APPXML_UTF32   = "application/xml;charset=UTF-32";
49      private static final String APPXML_UTF16BE = "application/xml;charset=UTF-16BE";
50      private static final String APPXML_UTF16LE = "application/xml;charset=UTF-16LE";
51      private static final String APPXML_UTF32BE = "application/xml;charset=UTF-32BE";
52      private static final String APPXML_UTF32LE = "application/xml;charset=UTF-32LE";
53  
54      private static final String TXTXML = "text/xml";
55  
56      protected String calculateHttpEncoding(final String httpContentType, final String bomEnc, final String xmlGuessEnc,
57          final String xmlEnc, final boolean lenient, final String defaultEncoding) throws IOException {
58          try (MockXmlStreamReader mock = new MockXmlStreamReader(defaultEncoding)) {
59              return mock.calculateHttpEncoding(bomEnc, xmlGuessEnc, xmlEnc, lenient, httpContentType);
60          }
61      }
62  
63      protected String calculateRawEncoding(final String bomEnc, final String xmlGuessEnc, final String xmlEnc,
64          final String defaultEncoding) throws IOException {
65          try (MockXmlStreamReader mock = new MockXmlStreamReader(defaultEncoding)) {
66              return mock.calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc);
67          }
68      }
69  
70      @SuppressWarnings("boxing")
71      private void checkAppXml(final boolean expected, final String mime) {
72          assertEquals(expected, XmlStreamReader.isAppXml(mime), "Mime=[" + mime + "]");
73      }
74  
75      private void checkContentTypeEncoding(final String expected, final String httpContentType) {
76          assertEquals(expected, XmlStreamReader.getContentTypeEncoding(httpContentType), "ContentTypeEncoding=[" + httpContentType + "]");
77      }
78  
79      private void checkContentTypeMime(final String expected, final String httpContentType) {
80          assertEquals(expected, XmlStreamReader.getContentTypeMime(httpContentType), "ContentTypeMime=[" + httpContentType + "]");
81      }
82  
83      private void checkHttpEncoding(final String expected, final boolean lenient, final String httpContentType,
84              final String bomEnc, final String xmlGuessEnc, final String xmlEnc, final String defaultEncoding) throws IOException {
85          final StringBuilder builder = new StringBuilder();
86          builder.append("HttpEncoding=[").append(bomEnc).append("], ");
87          builder.append("lenient=[").append(lenient).append("], ");
88          builder.append("httpContentType=[").append(httpContentType).append("], ");
89          builder.append("bomEnc=[").append(bomEnc).append("], ");
90          builder.append("xmlGuessEnc=[").append(xmlGuessEnc).append("], ");
91          builder.append("xmlEnc=[").append(xmlEnc).append("], ");
92          builder.append("defaultEncoding=[").append(defaultEncoding).append("],");
93          final String encoding = calculateHttpEncoding(httpContentType, bomEnc, xmlGuessEnc, xmlEnc, lenient, defaultEncoding);
94          assertEquals(expected, encoding, builder.toString());
95      }
96  
97      private void checkHttpError(final String msgSuffix, final boolean lenient, final String httpContentType,
98              final String bomEnc, final String xmlGuessEnc, final String xmlEnc, final String defaultEncoding) {
99          try {
100             checkHttpEncoding("XmlStreamReaderException", lenient, httpContentType, bomEnc, xmlGuessEnc, xmlEnc, defaultEncoding);
101             fail("Expected XmlStreamReaderException");
102         } catch (final XmlStreamReaderException e) {
103             assertTrue(e.getMessage().startsWith("Illegal encoding"), "Msg Start: " + e.getMessage());
104             assertTrue(e.getMessage().endsWith(msgSuffix), "Msg End: " + e.getMessage());
105             assertEquals(bomEnc, e.getBomEncoding(), "bomEnc");
106             assertEquals(xmlGuessEnc, e.getXmlGuessEncoding(), "xmlGuessEnc");
107             assertEquals(xmlEnc, e.getXmlEncoding(), "xmlEnc");
108             assertEquals(XmlStreamReader.getContentTypeEncoding(httpContentType), e.getContentTypeEncoding(),
109                     "ContentTypeEncoding");
110             assertEquals(XmlStreamReader.getContentTypeMime(httpContentType), e.getContentTypeMime(), "ContentTypeMime");
111         } catch (final Exception e) {
112             fail("Expected XmlStreamReaderException, but threw " + e);
113         }
114     }
115 
116     private void checkRawEncoding(final String expected,
117             final String bomEnc, final String xmlGuessEnc, final String xmlEnc, final String defaultEncoding) throws IOException {
118         final StringBuilder builder = new StringBuilder();
119         builder.append("RawEncoding: ").append(bomEnc).append("], ");
120         builder.append("bomEnc=[").append(bomEnc).append("], ");
121         builder.append("xmlGuessEnc=[").append(xmlGuessEnc).append("], ");
122         builder.append("xmlEnc=[").append(xmlEnc).append("], ");
123         builder.append("defaultEncoding=[").append(defaultEncoding).append("],");
124         final String encoding = calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc, defaultEncoding);
125         assertEquals(expected, encoding, builder.toString());
126     }
127 
128     private void checkRawError(final String msgSuffix,
129             final String bomEnc, final String xmlGuessEnc, final String xmlEnc, final String defaultEncoding) {
130         try {
131             checkRawEncoding("XmlStreamReaderException", bomEnc, xmlGuessEnc, xmlEnc, defaultEncoding);
132             fail("Expected XmlStreamReaderException");
133         } catch (final XmlStreamReaderException e) {
134             assertTrue(e.getMessage().startsWith("Illegal encoding"), "Msg Start: " + e.getMessage());
135             assertTrue(e.getMessage().endsWith(msgSuffix), "Msg End: "   + e.getMessage());
136             assertEquals(bomEnc, e.getBomEncoding(), "bomEnc");
137             assertEquals(xmlGuessEnc, e.getXmlGuessEncoding(), "xmlGuessEnc");
138             assertEquals(xmlEnc, e.getXmlEncoding(), "xmlEnc");
139             assertNull(e.getContentTypeEncoding(), "ContentTypeEncoding");
140             assertNull(e.getContentTypeMime(), "ContentTypeMime");
141         } catch (final Exception e) {
142             fail("Expected XmlStreamReaderException, but threw " + e);
143         }
144     }
145 
146     @SuppressWarnings("boxing")
147     private void checkTextXml(final boolean expected, final String mime) {
148         assertEquals(expected, XmlStreamReader.isTextXml(mime), "Mime=[" + mime + "]");
149     }
150 
151     @Test
152     public void testAppXml() {
153         checkAppXml(false, null);
154         checkAppXml(false, "");
155         checkAppXml(true,  "application/xml");
156         checkAppXml(true,  "application/xml-dtd");
157         checkAppXml(true,  "application/xml-external-parsed-entity");
158         checkAppXml(true,  "application/soap+xml");
159         checkAppXml(true,  "application/atom+xml");
160         checkAppXml(false, "application/atomxml");
161         checkAppXml(false, "text/xml");
162         checkAppXml(false, "text/atom+xml");
163         checkAppXml(true,  "application/xml-dtd");
164         checkAppXml(true,  "application/xml-external-parsed-entity");
165     }
166 
167     @Test
168     public void testCalculateHttpEncoding() throws IOException {
169         // No BOM        Expected     Lenient cType           BOM         Guess       XML         Default
170         checkHttpError(HTTPMGS3,      true,   null,           null,       null,       null,       null);
171         checkHttpError(HTTPMGS3,      false,  null,           null,       null,       "UTF-8",    null);
172         checkHttpEncoding("UTF-8",    true,   null,           null,       null,       "UTF-8",    null);
173         checkHttpEncoding("UTF-16LE", true,   null,           null,       null,       "UTF-16LE", null);
174         checkHttpError(HTTPMGS3,      false,  "text/css",     null,       null,       null,       null);
175         checkHttpEncoding("US-ASCII", false,  TXTXML,         null,       null,       null,       null);
176         checkHttpEncoding("UTF-16BE", false,  TXTXML,         null,       null,       null,       "UTF-16BE");
177         checkHttpEncoding("UTF-8",    false,  APPXML,         null,       null,       null,       null);
178         checkHttpEncoding("UTF-16BE", false,  APPXML,         null,       null,       null,       "UTF-16BE");
179         checkHttpEncoding("UTF-8",    false,  APPXML,         "UTF-8",    null,       null,       "UTF-16BE");
180         checkHttpEncoding("UTF-16LE", false,  APPXML_UTF16LE, null,       null,       null,       null);
181         checkHttpEncoding("UTF-16BE", false,  APPXML_UTF16BE, null,       null,       null,       null);
182         checkHttpError(HTTPMGS1,      false,  APPXML_UTF16LE, "UTF-16LE", null,       null,       null);
183         checkHttpError(HTTPMGS1,      false,  APPXML_UTF16BE, "UTF-16BE", null,       null,       null);
184         checkHttpError(HTTPMGS2,      false,  APPXML_UTF16,   null,       null,       null,       null);
185         checkHttpError(HTTPMGS2,      false,  APPXML_UTF16,   "UTF-8",    null,       null,       null);
186         checkHttpEncoding("UTF-16LE", false,  APPXML_UTF16,   "UTF-16LE", null,       null,       null);
187         checkHttpEncoding("UTF-16BE", false,  APPXML_UTF16,   "UTF-16BE", null,       null,       null);
188         checkHttpEncoding("UTF-8",    false,  APPXML_UTF8,    null,       null,       null,       null);
189         checkHttpEncoding("UTF-8",    false,  APPXML_UTF8,    "UTF-16BE", "UTF-16BE", "UTF-16BE", "UTF-16BE");
190     }
191 
192     @Test
193     public void testCalculateHttpEncodingUtf32() throws IOException {
194         // No BOM        Expected     Lenient cType           BOM         Guess       XML         Default
195         checkHttpEncoding("UTF-32LE", true,   null,           null,       null,       "UTF-32LE", null);
196         checkHttpEncoding("UTF-32BE", false,  TXTXML,         null,       null,       null,       "UTF-32BE");
197         checkHttpEncoding("UTF-32BE", false,  APPXML,         null,       null,       null,       "UTF-32BE");
198         checkHttpEncoding("UTF-32LE", false,  APPXML_UTF32LE, null,       null,       null,       null);
199         checkHttpEncoding("UTF-32BE", false,  APPXML_UTF32BE, null,       null,       null,       null);
200         checkHttpError(HTTPMGS1,      false,  APPXML_UTF32LE, "UTF-32LE", null,       null,       null);
201         checkHttpError(HTTPMGS1,      false,  APPXML_UTF32BE, "UTF-32BE", null,       null,       null);
202         checkHttpError(HTTPMGS2,      false,  APPXML_UTF32,   null,       null,       null,       null);
203         checkHttpError(HTTPMGS2,      false,  APPXML_UTF32,   "UTF-8",    null,       null,       null);
204         checkHttpEncoding("UTF-32LE", false,  APPXML_UTF32,   "UTF-32LE", null,       null,       null);
205         checkHttpEncoding("UTF-32BE", false,  APPXML_UTF32,   "UTF-32BE", null,       null,       null);
206         checkHttpEncoding("UTF-8",    false,  APPXML_UTF8,    "UTF-32BE", "UTF-32BE", "UTF-32BE", "UTF-32BE");
207     }
208 
209     @Test
210     public void testCalculateRawEncodingAdditionalUTF16() throws IOException {
211         //                           BOM         Guess       XML         Default
212         checkRawError(RAWMGS1,       "UTF-16BE", "UTF-16",   null,       null);
213         checkRawEncoding("UTF-16BE", "UTF-16BE", null,       "UTF-16",   null);
214         checkRawEncoding("UTF-16BE", "UTF-16BE", "UTF-16BE", "UTF-16",   null);
215         checkRawError(RAWMGS1,       "UTF-16BE", null,       "UTF-16LE", null);
216         checkRawError(RAWMGS1,       "UTF-16BE", "UTF-16BE", "UTF-16LE", null);
217         checkRawError(RAWMGS1,       "UTF-16LE", "UTF-16",   null,       null);
218         checkRawEncoding("UTF-16LE", "UTF-16LE", null,       "UTF-16",   null);
219         checkRawEncoding("UTF-16LE", "UTF-16LE", "UTF-16LE", "UTF-16",   null);
220         checkRawError(RAWMGS1,       "UTF-16LE", null,       "UTF-16BE", null);
221         checkRawError(RAWMGS1,       "UTF-16LE", "UTF-16LE", "UTF-16BE", null);
222     }
223 
224     @Test
225     public void testCalculateRawEncodingAdditionalUTF32() throws IOException {
226         //                           BOM         Guess       XML         Default
227         checkRawError(RAWMGS1,       "UTF-32BE", "UTF-32",   null,       null);
228         checkRawEncoding("UTF-32BE", "UTF-32BE", null,       "UTF-32",   null);
229         checkRawEncoding("UTF-32BE", "UTF-32BE", "UTF-32BE", "UTF-32",   null);
230         checkRawError(RAWMGS1,       "UTF-32BE", null,       "UTF-32LE", null);
231         checkRawError(RAWMGS1,       "UTF-32BE", "UTF-32BE", "UTF-32LE", null);
232         checkRawError(RAWMGS1,       "UTF-32LE", "UTF-32",   null,       null);
233         checkRawEncoding("UTF-32LE", "UTF-32LE", null,       "UTF-32",   null);
234         checkRawEncoding("UTF-32LE", "UTF-32LE", "UTF-32LE", "UTF-32",   null);
235         checkRawError(RAWMGS1,       "UTF-32LE", null,       "UTF-32BE", null);
236         checkRawError(RAWMGS1,       "UTF-32LE", "UTF-32LE", "UTF-32BE", null);
237     }
238 
239     @Test
240     public void testCalculateRawEncodingNoBOM() throws IOException {
241         // No BOM        Expected    BOM         Guess       XML         Default
242         checkRawError(RAWMGS2,       "UTF-32",   null,       null,       null);
243         //
244         checkRawEncoding("UTF-8",    null,       null,       null,       null);
245         checkRawEncoding("UTF-8",    null,       "UTF-16BE", null,       null); /* why default & not Guess? */
246         checkRawEncoding("UTF-8",    null,       null,       "UTF-16BE", null); /* why default & not XMLEnc? */
247         checkRawEncoding("UTF-8",    null,       "UTF-8",    "UTF-8",    "UTF-16BE");
248         //
249         checkRawEncoding("UTF-16BE", null,       "UTF-16BE", "UTF-16BE", null);
250         checkRawEncoding("UTF-16BE", null,       null,       null,       "UTF-16BE");
251         checkRawEncoding("UTF-16BE", null,       "UTF-8",    null,       "UTF-16BE"); /* why default & not Guess? */
252         checkRawEncoding("UTF-16BE", null,       null,       "UTF-8",    "UTF-16BE"); /* why default & not Guess? */
253         checkRawEncoding("UTF-16BE", null,       "UTF-16BE", "UTF-16",   null);
254         checkRawEncoding("UTF-16LE", null,       "UTF-16LE", "UTF-16",   null);
255     }
256 
257     @Test
258     public void testCalculateRawEncodingStandard() throws IOException {
259         // Standard BOM Checks           BOM         Other       Default
260         testCalculateRawEncodingStandard("UTF-8",    "UTF-16BE", "UTF-16LE");
261         testCalculateRawEncodingStandard("UTF-16BE", "UTF-8",    "UTF-16LE");
262         testCalculateRawEncodingStandard("UTF-16LE", "UTF-8",    "UTF-16BE");
263     }
264 
265     private void testCalculateRawEncodingStandard(final String bomEnc, final String otherEnc, final String defaultEnc) throws IOException {
266         //               Expected   BOM        Guess     XMLEnc    Default
267         checkRawEncoding(bomEnc,    bomEnc,    null,     null,     defaultEnc);
268         checkRawEncoding(bomEnc,    bomEnc,    bomEnc,   null,     defaultEnc);
269         checkRawError(RAWMGS1,      bomEnc,    otherEnc, null,     defaultEnc);
270         checkRawEncoding(bomEnc,    bomEnc,    null,     bomEnc,   defaultEnc);
271         checkRawError(RAWMGS1,      bomEnc,    null,     otherEnc, defaultEnc);
272         checkRawEncoding(bomEnc,    bomEnc,    bomEnc,   bomEnc,   defaultEnc);
273         checkRawError(RAWMGS1,      bomEnc,    bomEnc,   otherEnc, defaultEnc);
274         checkRawError(RAWMGS1,      bomEnc,    otherEnc, bomEnc,   defaultEnc);
275 
276     }
277 
278     @Test
279     public void testCalculateRawEncodingStandardUtf32() throws IOException {
280         // Standard BOM Checks           BOM         Other       Default
281         testCalculateRawEncodingStandard("UTF-8",    "UTF-32BE", "UTF-32LE");
282         testCalculateRawEncodingStandard("UTF-32BE", "UTF-8",    "UTF-32LE");
283         testCalculateRawEncodingStandard("UTF-32LE", "UTF-8",    "UTF-32BE");
284 }
285 
286     @Test
287     public void testContentTypeEncoding() {
288         checkContentTypeEncoding(null, null);
289         checkContentTypeEncoding(null, "");
290         checkContentTypeEncoding(null, "application/xml");
291         checkContentTypeEncoding(null, "application/xml;");
292         checkContentTypeEncoding(null, "multipart/mixed;boundary=frontier");
293         checkContentTypeEncoding(null, "multipart/mixed;boundary='frontier'");
294         checkContentTypeEncoding(null, "multipart/mixed;boundary=\"frontier\"");
295         checkContentTypeEncoding("UTF-16", "application/xml;charset=utf-16");
296         checkContentTypeEncoding("UTF-16", "application/xml;charset=UTF-16");
297         checkContentTypeEncoding("UTF-16", "application/xml;charset='UTF-16'");
298         checkContentTypeEncoding("UTF-16", "application/xml;charset=\"UTF-16\"");
299         checkContentTypeEncoding("UTF-32", "application/xml;charset=utf-32");
300         checkContentTypeEncoding("UTF-32", "application/xml;charset=UTF-32");
301         checkContentTypeEncoding("UTF-32", "application/xml;charset='UTF-32'");
302         checkContentTypeEncoding("UTF-32", "application/xml;charset=\"UTF-32\"");
303     }
304 
305     @Test
306     public void testContentTypeMime() {
307         checkContentTypeMime(null, null);
308         checkContentTypeMime("", "");
309         checkContentTypeMime("application/xml", "application/xml");
310         checkContentTypeMime("application/xml", "application/xml;");
311         checkContentTypeMime("application/xml", "application/xml;charset=utf-16");
312         checkContentTypeMime("application/xml", "application/xml;charset=utf-32");
313     }
314 
315     @Test
316     public void testTextXml() {
317         checkTextXml(false, null);
318         checkTextXml(false, "");
319         checkTextXml(true,  "text/xml");
320         checkTextXml(true,  "text/xml-external-parsed-entity");
321         checkTextXml(true,  "text/soap+xml");
322         checkTextXml(true,  "text/atom+xml");
323         checkTextXml(false, "text/atomxml");
324         checkTextXml(false, "application/xml");
325         checkTextXml(false, "application/atom+xml");
326     }
327 }