001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.xml; 019 020import javax.xml.XMLConstants; 021import javax.xml.parsers.DocumentBuilderFactory; 022import javax.xml.parsers.ParserConfigurationException; 023import javax.xml.parsers.SAXParserFactory; 024import javax.xml.stream.XMLInputFactory; 025import javax.xml.transform.Source; 026import javax.xml.transform.TransformerConfigurationException; 027import javax.xml.transform.TransformerFactory; 028import javax.xml.transform.sax.SAXSource; 029import javax.xml.transform.stream.StreamSource; 030import javax.xml.validation.SchemaFactory; 031import javax.xml.xpath.XPathFactory; 032 033import org.xml.sax.InputSource; 034import org.xml.sax.SAXException; 035import org.xml.sax.XMLReader; 036 037/** 038 * Entry point for obtaining hardened JAXP factories. 039 * 040 * <p>Every method on this class returns a <em>fresh, hardened</em> factory instance. No caching or pooling is performed; callers on a hot path are responsible 041 * for their own caching.</p> 042 * 043 * <h2>Hardening guarantees</h2> 044 * 045 * <p>Every factory returned by this class makes the same three guarantees, regardless of which JAXP implementation is on the classpath:</p> 046 * 047 * <ul> 048 * <li><strong>External DTDs are not fetched.</strong></li> 049 * <li><strong>External entities are not resolved.</strong></li> 050 * <li><strong>Internal entity expansion is bounded</strong> by the JDK's default limit, so DoS payloads such as Billion Laughs are rejected before they 051 * exhaust resources.</li> 052 * </ul> 053 * 054 * <p>The guarantees hold whether or not the caller opts into DTD validation 055 * ({@link javax.xml.parsers.DocumentBuilderFactory#setValidating(boolean) setValidating(true)}) or attaches a compiled XSD via 056 * {@link javax.xml.parsers.DocumentBuilderFactory#setSchema(javax.xml.validation.Schema) setSchema}: every external resource the validation would otherwise 057 * fetch (the DTD itself, an {@code xsi:schemaLocation} hint, an external entity referenced from the DTD) remains blocked.</p> 058 * 059 * <p>Each method on this class adds factory-specific guarantees on top of the three above, documented on the corresponding {@code newXxxFactory()} method.</p> 060 * 061 * <h2>Caller-supplied URIs</h2> 062 * 063 * <p>A top-level URI passed directly by the caller is fetched as-is: {@code StreamSource(systemId)}, {@code DocumentBuilder.parse(String)}, or a 064 * {@code SAXSource} built from a system id all cause the JAXP implementation to open that URI without consulting the hardening layer. Use a 065 * {@link javax.xml.transform.URIResolver} or {@link org.xml.sax.EntityResolver} if you need to restrict the top-level fetch.</p> 066 * 067 * <h2>Thread safety</h2> 068 * 069 * <p>The returned factories inherit the thread-safety properties of the underlying JAXP implementation, which in practice means they are <strong>not 070 * guaranteed to be thread-safe</strong>. Create a new factory per thread or synchronise externally.</p> 071 * 072 * <p>This class itself is thread-safe: all methods are static and stateless.</p> 073 */ 074public final class XmlFactories { 075 076 private static SAXParserFactory dispatch(final SAXParserFactory factory) { 077 switch (factory.getClass().getName()) { 078 case "com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl": 079 return StockJdkProvider.configure(factory); 080 case "org.apache.harmony.xml.parsers.SAXParserFactoryImpl": 081 return AndroidProvider.configure(factory); 082 case "org.apache.xerces.jaxp.SAXParserFactoryImpl": 083 return XercesProvider.configure(factory); 084 default: 085 throw noProvider(factory); 086 } 087 } 088 089 private static XMLInputFactory dispatch(final XMLInputFactory factory) { 090 switch (factory.getClass().getName()) { 091 case "com.sun.xml.internal.stream.XMLInputFactoryImpl": 092 return StockJdkProvider.configure(factory); 093 case "com.ctc.wstx.stax.WstxInputFactory": 094 return WoodstoxProvider.configure(factory); 095 default: 096 throw noProvider(factory); 097 } 098 } 099 100 private static TransformerFactory dispatch(final TransformerFactory factory) { 101 switch (factory.getClass().getName()) { 102 case "com.sun.org.apache.xalan.internal.xsltc.trax.TransformerFactoryImpl": 103 return StockJdkProvider.configure(factory); 104 case "org.apache.xalan.processor.TransformerFactoryImpl": 105 case "org.apache.xalan.xsltc.trax.TransformerFactoryImpl": 106 return XalanProvider.configure(factory); 107 case "net.sf.saxon.TransformerFactoryImpl": 108 case "com.saxonica.config.ProfessionalTransformerFactory": 109 case "com.saxonica.config.EnterpriseTransformerFactory": 110 return SaxonProvider.configure(factory); 111 default: 112 throw noProvider(factory); 113 } 114 } 115 116 private static XPathFactory dispatch(final XPathFactory factory) { 117 switch (factory.getClass().getName()) { 118 case "com.sun.org.apache.xpath.internal.jaxp.XPathFactoryImpl": 119 return StockJdkProvider.configure(factory); 120 case "org.apache.xpath.jaxp.XPathFactoryImpl": 121 return XalanProvider.configure(factory); 122 case "net.sf.saxon.xpath.XPathFactoryImpl": 123 return SaxonProvider.configure(factory); 124 default: 125 throw noProvider(factory); 126 } 127 } 128 129 private static SchemaFactory dispatch(final SchemaFactory factory) { 130 switch (factory.getClass().getName()) { 131 case "com.sun.org.apache.xerces.internal.jaxp.validation.XMLSchemaFactory": 132 return StockJdkProvider.configure(factory); 133 case "org.apache.xerces.jaxp.validation.XMLSchemaFactory": 134 return XercesProvider.configure(factory); 135 default: 136 throw noProvider(factory); 137 } 138 } 139 140 /** 141 * Rewrites a {@link Source} so that any SAX parsing it triggers runs through an {@link XmlFactories}-hardened {@link XMLReader}. 142 * 143 * <p>Only {@link StreamSource} and {@link SAXSource} without a reader are enriched with a hardened reader. Other kinds of sources are returned as-is.</p> 144 * 145 * @param source the source to harden; never {@code null}. 146 * @return a hardened source. 147 * @throws TransformerConfigurationException if a hardened reader cannot be obtained. 148 */ 149 public static Source harden(final Source source) throws TransformerConfigurationException { 150 if (source instanceof StreamSource || source instanceof SAXSource && ((SAXSource) source).getXMLReader() == null) { 151 try { 152 final XMLReader reader = newSAXParserFactory().newSAXParser().getXMLReader(); 153 final InputSource inputSource = SAXSource.sourceToInputSource(source); 154 return inputSource == null ? source : new SAXSource(reader, inputSource); 155 } catch (final ParserConfigurationException | SAXException e) { 156 throw new TransformerConfigurationException("Failed to obtain a hardened XMLReader for source parsing", e); 157 } 158 } 159 return source; 160 } 161 162 /** 163 * Hardens an existing {@link XMLReader}. 164 * 165 * @param reader the reader to harden; never {@code null}. 166 * @return a hardened reader. 167 * @throws IllegalStateException if the reader's concrete class is not recognized by any bundled hardening recipe, or if the matching recipe cannot apply 168 * its settings to it. 169 */ 170 public static XMLReader harden(final XMLReader reader) { 171 switch (reader.getClass().getName()) { 172 case "com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser": 173 return StockJdkProvider.configure(reader); 174 case "org.apache.harmony.xml.ExpatReader": 175 case "org.apache.commons.xml.AndroidProvider$GuardedXMLReader": 176 return AndroidProvider.configure(reader); 177 case "org.apache.xerces.jaxp.SAXParserImpl$JAXPSAXParser": 178 return XercesProvider.configure(reader); 179 default: 180 throw noProvider(reader); 181 } 182 } 183 184 /** 185 * Returns a fresh, hardened {@link DocumentBuilderFactory}. 186 * 187 * <p><strong>Enabling XInclude:</strong> {@link DocumentBuilderFactory#setXIncludeAware(boolean) setXIncludeAware(true)} on its own does not make XInclude 188 * usable, because an included resource is fetched like any other external resource and is therefore blocked, failing the parse. A caller that genuinely 189 * wants XInclude must, in addition to enabling awareness, install a custom {@link org.xml.sax.EntityResolver} that permits those specific lookups.</p> 190 * 191 * @return a hardened factory. 192 * @throws IllegalStateException if a required hardening setting cannot be applied to the underlying implementation. 193 */ 194 public static DocumentBuilderFactory newDocumentBuilderFactory() { 195 return DocumentBuilderHardener.harden(DocumentBuilderFactory.newInstance()); 196 } 197 198 /** 199 * Returns a fresh, hardened {@link SAXParserFactory}. 200 * 201 * <p>Beyond the three universal guarantees on {@link XmlFactories}, XInclude resolution is disabled. Calling 202 * {@link SAXParserFactory#setXIncludeAware(boolean) setXIncludeAware(true)} on the returned factory does not re-enable resolution; a parse that encounters 203 * an {@code xi:include} element fails.</p> 204 * 205 * @return a hardened factory. 206 * @throws IllegalStateException if the underlying JAXP implementation is not recognized by any bundled hardening recipe, or if the matching recipe cannot 207 * apply its settings to it. 208 */ 209 public static SAXParserFactory newSAXParserFactory() { 210 return dispatch(SAXParserFactory.newInstance()); 211 } 212 213 /** 214 * Returns a fresh, hardened {@link SchemaFactory} configured for W3C XML Schema ({@link XMLConstants#W3C_XML_SCHEMA_NS_URI}). 215 * 216 * <p>Beyond the three universal guarantees on {@link XmlFactories}:</p> 217 * 218 * <ul> 219 * <li>{@code xs:import}, {@code xs:include} and {@code xs:redefine} schemaLocation URIs are not resolved during schema compilation, and</li> 220 * <li>{@code xsi:schemaLocation} / {@code xsi:noNamespaceSchemaLocation} hints in instance documents are not resolved during validation.</li> 221 * </ul> 222 * 223 * <p>The same guarantees apply to {@link javax.xml.validation.Validator} and {@link javax.xml.validation.ValidatorHandler} instances produced from the 224 * resulting {@link javax.xml.validation.Schema}.</p> 225 * 226 * @return a hardened factory. 227 * @throws IllegalStateException if the underlying Schema implementation is not recognized by any bundled hardening recipe, or if the matching recipe 228 * cannot apply its settings to it. 229 */ 230 public static SchemaFactory newSchemaFactory() { 231 return dispatch(SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI)); 232 } 233 234 /** 235 * Returns a fresh, hardened {@link TransformerFactory}. 236 * 237 * <p>Beyond the three universal guarantees on {@link XmlFactories}: {@code xsl:import}, {@code xsl:include} and {@code document()} URIs are not 238 * resolved.</p> 239 * 240 * <p>The guarantees apply to every parser the factory creates internally, both for stylesheet compilation and for source-document reading at 241 * {@code Transformer.transform(Source, Result)} time.</p> 242 * 243 * @return a hardened factory. 244 * @throws IllegalStateException if the underlying TrAX implementation is not recognized by any bundled hardening recipe, or if the matching recipe cannot 245 * apply its settings to it. 246 */ 247 public static TransformerFactory newTransformerFactory() { 248 return dispatch(TransformerFactory.newInstance()); 249 } 250 251 /** 252 * Returns a fresh, hardened {@link XMLInputFactory}. 253 * 254 * <p>The three universal guarantees on {@link XmlFactories} apply; StAX exposes no additional vectors beyond them.</p> 255 * 256 * @return a hardened factory. 257 * @throws IllegalStateException if the underlying StAX implementation is not recognized by any bundled hardening recipe, or if the matching recipe cannot 258 * apply its settings to it. 259 */ 260 public static XMLInputFactory newXMLInputFactory() { 261 return dispatch(XMLInputFactory.newInstance()); 262 } 263 264 /** 265 * Returns a fresh, hardened {@link XPathFactory} for the default XPath object model. 266 * 267 * <p>Beyond the three universal guarantees on {@link XmlFactories}, URI-fetching XPath 3.1+ functions ({@code doc()}, {@code collection()}, 268 * {@code unparsed-text()}) are not resolved.</p> 269 * 270 * @return a hardened factory. 271 * @throws IllegalStateException if the underlying XPath implementation is not recognized by any bundled hardening recipe, or if the matching recipe cannot 272 * apply its settings to it. 273 */ 274 public static XPathFactory newXPathFactory() { 275 return dispatch(XPathFactory.newInstance()); 276 } 277 278 private static HardeningException noProvider(final Object factory) { 279 return new HardeningException("No hardening recipe for JAXP factory class " + factory.getClass().getName()); 280 } 281 282 private XmlFactories() { 283 // static only 284 } 285}