XmlFactories.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.xml;
import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.stream.XMLInputFactory;
import javax.xml.transform.Source;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.SchemaFactory;
import javax.xml.xpath.XPathFactory;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
/**
* Entry point for obtaining hardened JAXP factories.
*
* <p>Every method on this class returns a <em>fresh, hardened</em> factory instance. No caching or pooling is performed; callers on a hot path are responsible
* for their own caching.</p>
*
* <h2>Hardening guarantees</h2>
*
* <p>Every factory returned by this class makes the same three guarantees, regardless of which JAXP implementation is on the classpath:</p>
*
* <ul>
* <li><strong>External DTDs are not fetched.</strong></li>
* <li><strong>External entities are not resolved.</strong></li>
* <li><strong>Internal entity expansion is bounded</strong> by the JDK's default limit, so DoS payloads such as Billion Laughs are rejected before they
* exhaust resources.</li>
* </ul>
*
* <p>The guarantees hold whether or not the caller opts into DTD validation
* ({@link javax.xml.parsers.DocumentBuilderFactory#setValidating(boolean) setValidating(true)}) or attaches a compiled XSD via
* {@link javax.xml.parsers.DocumentBuilderFactory#setSchema(javax.xml.validation.Schema) setSchema}: every external resource the validation would otherwise
* fetch (the DTD itself, an {@code xsi:schemaLocation} hint, an external entity referenced from the DTD) remains blocked.</p>
*
* <p>Each method on this class adds factory-specific guarantees on top of the three above, documented on the corresponding {@code newXxxFactory()} method.</p>
*
* <h2>Caller-supplied URIs</h2>
*
* <p>A top-level URI passed directly by the caller is fetched as-is: {@code StreamSource(systemId)}, {@code DocumentBuilder.parse(String)}, or a
* {@code SAXSource} built from a system id all cause the JAXP implementation to open that URI without consulting the hardening layer. Use a
* {@link javax.xml.transform.URIResolver} or {@link org.xml.sax.EntityResolver} if you need to restrict the top-level fetch.</p>
*
* <h2>Thread safety</h2>
*
* <p>The returned factories inherit the thread-safety properties of the underlying JAXP implementation, which in practice means they are <strong>not
* guaranteed to be thread-safe</strong>. Create a new factory per thread or synchronise externally.</p>
*
* <p>This class itself is thread-safe: all methods are static and stateless.</p>
*/
public final class XmlFactories {
private static SAXParserFactory dispatch(final SAXParserFactory factory) {
switch (factory.getClass().getName()) {
case "com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl":
return StockJdkProvider.configure(factory);
case "org.apache.harmony.xml.parsers.SAXParserFactoryImpl":
return AndroidProvider.configure(factory);
case "org.apache.xerces.jaxp.SAXParserFactoryImpl":
return XercesProvider.configure(factory);
default:
throw noProvider(factory);
}
}
private static XMLInputFactory dispatch(final XMLInputFactory factory) {
switch (factory.getClass().getName()) {
case "com.sun.xml.internal.stream.XMLInputFactoryImpl":
return StockJdkProvider.configure(factory);
case "com.ctc.wstx.stax.WstxInputFactory":
return WoodstoxProvider.configure(factory);
default:
throw noProvider(factory);
}
}
private static TransformerFactory dispatch(final TransformerFactory factory) {
switch (factory.getClass().getName()) {
case "com.sun.org.apache.xalan.internal.xsltc.trax.TransformerFactoryImpl":
return StockJdkProvider.configure(factory);
case "org.apache.xalan.processor.TransformerFactoryImpl":
case "org.apache.xalan.xsltc.trax.TransformerFactoryImpl":
return XalanProvider.configure(factory);
case "net.sf.saxon.TransformerFactoryImpl":
case "com.saxonica.config.ProfessionalTransformerFactory":
case "com.saxonica.config.EnterpriseTransformerFactory":
return SaxonProvider.configure(factory);
default:
throw noProvider(factory);
}
}
private static XPathFactory dispatch(final XPathFactory factory) {
switch (factory.getClass().getName()) {
case "com.sun.org.apache.xpath.internal.jaxp.XPathFactoryImpl":
return StockJdkProvider.configure(factory);
case "org.apache.xpath.jaxp.XPathFactoryImpl":
return XalanProvider.configure(factory);
case "net.sf.saxon.xpath.XPathFactoryImpl":
return SaxonProvider.configure(factory);
default:
throw noProvider(factory);
}
}
private static SchemaFactory dispatch(final SchemaFactory factory) {
switch (factory.getClass().getName()) {
case "com.sun.org.apache.xerces.internal.jaxp.validation.XMLSchemaFactory":
return StockJdkProvider.configure(factory);
case "org.apache.xerces.jaxp.validation.XMLSchemaFactory":
return XercesProvider.configure(factory);
default:
throw noProvider(factory);
}
}
/**
* Rewrites a {@link Source} so that any SAX parsing it triggers runs through an {@link XmlFactories}-hardened {@link XMLReader}.
*
* <p>Only {@link StreamSource} and {@link SAXSource} without a reader are enriched with a hardened reader. Other kinds of sources are returned as-is.</p>
*
* @param source the source to harden; never {@code null}.
* @return a hardened source.
* @throws TransformerConfigurationException if a hardened reader cannot be obtained.
*/
public static Source harden(final Source source) throws TransformerConfigurationException {
if (source instanceof StreamSource || source instanceof SAXSource && ((SAXSource) source).getXMLReader() == null) {
try {
final XMLReader reader = newSAXParserFactory().newSAXParser().getXMLReader();
final InputSource inputSource = SAXSource.sourceToInputSource(source);
return inputSource == null ? source : new SAXSource(reader, inputSource);
} catch (final ParserConfigurationException | SAXException e) {
throw new TransformerConfigurationException("Failed to obtain a hardened XMLReader for source parsing", e);
}
}
return source;
}
/**
* Hardens an existing {@link XMLReader}.
*
* @param reader the reader to harden; never {@code null}.
* @return a hardened reader.
* @throws IllegalStateException if the reader's concrete class is not recognized by any bundled hardening recipe, or if the matching recipe cannot apply
* its settings to it.
*/
public static XMLReader harden(final XMLReader reader) {
switch (reader.getClass().getName()) {
case "com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser":
return StockJdkProvider.configure(reader);
case "org.apache.harmony.xml.ExpatReader":
case "org.apache.commons.xml.AndroidProvider$GuardedXMLReader":
return AndroidProvider.configure(reader);
case "org.apache.xerces.jaxp.SAXParserImpl$JAXPSAXParser":
return XercesProvider.configure(reader);
default:
throw noProvider(reader);
}
}
/**
* Returns a fresh, hardened {@link DocumentBuilderFactory}.
*
* <p><strong>Enabling XInclude:</strong> {@link DocumentBuilderFactory#setXIncludeAware(boolean) setXIncludeAware(true)} on its own does not make XInclude
* usable, because an included resource is fetched like any other external resource and is therefore blocked, failing the parse. A caller that genuinely
* wants XInclude must, in addition to enabling awareness, install a custom {@link org.xml.sax.EntityResolver} that permits those specific lookups.</p>
*
* @return a hardened factory.
* @throws IllegalStateException if a required hardening setting cannot be applied to the underlying implementation.
*/
public static DocumentBuilderFactory newDocumentBuilderFactory() {
return DocumentBuilderHardener.harden(DocumentBuilderFactory.newInstance());
}
/**
* Returns a fresh, hardened {@link SAXParserFactory}.
*
* <p>Beyond the three universal guarantees on {@link XmlFactories}, XInclude resolution is disabled. Calling
* {@link SAXParserFactory#setXIncludeAware(boolean) setXIncludeAware(true)} on the returned factory does not re-enable resolution; a parse that encounters
* an {@code xi:include} element fails.</p>
*
* @return a hardened factory.
* @throws IllegalStateException if the underlying JAXP implementation is not recognized by any bundled hardening recipe, or if the matching recipe cannot
* apply its settings to it.
*/
public static SAXParserFactory newSAXParserFactory() {
return dispatch(SAXParserFactory.newInstance());
}
/**
* Returns a fresh, hardened {@link SchemaFactory} configured for W3C XML Schema ({@link XMLConstants#W3C_XML_SCHEMA_NS_URI}).
*
* <p>Beyond the three universal guarantees on {@link XmlFactories}:</p>
*
* <ul>
* <li>{@code xs:import}, {@code xs:include} and {@code xs:redefine} schemaLocation URIs are not resolved during schema compilation, and</li>
* <li>{@code xsi:schemaLocation} / {@code xsi:noNamespaceSchemaLocation} hints in instance documents are not resolved during validation.</li>
* </ul>
*
* <p>The same guarantees apply to {@link javax.xml.validation.Validator} and {@link javax.xml.validation.ValidatorHandler} instances produced from the
* resulting {@link javax.xml.validation.Schema}.</p>
*
* @return a hardened factory.
* @throws IllegalStateException if the underlying Schema implementation is not recognized by any bundled hardening recipe, or if the matching recipe
* cannot apply its settings to it.
*/
public static SchemaFactory newSchemaFactory() {
return dispatch(SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI));
}
/**
* Returns a fresh, hardened {@link TransformerFactory}.
*
* <p>Beyond the three universal guarantees on {@link XmlFactories}: {@code xsl:import}, {@code xsl:include} and {@code document()} URIs are not
* resolved.</p>
*
* <p>The guarantees apply to every parser the factory creates internally, both for stylesheet compilation and for source-document reading at
* {@code Transformer.transform(Source, Result)} time.</p>
*
* @return a hardened factory.
* @throws IllegalStateException if the underlying TrAX implementation is not recognized by any bundled hardening recipe, or if the matching recipe cannot
* apply its settings to it.
*/
public static TransformerFactory newTransformerFactory() {
return dispatch(TransformerFactory.newInstance());
}
/**
* Returns a fresh, hardened {@link XMLInputFactory}.
*
* <p>The three universal guarantees on {@link XmlFactories} apply; StAX exposes no additional vectors beyond them.</p>
*
* @return a hardened factory.
* @throws IllegalStateException if the underlying StAX implementation is not recognized by any bundled hardening recipe, or if the matching recipe cannot
* apply its settings to it.
*/
public static XMLInputFactory newXMLInputFactory() {
return dispatch(XMLInputFactory.newInstance());
}
/**
* Returns a fresh, hardened {@link XPathFactory} for the default XPath object model.
*
* <p>Beyond the three universal guarantees on {@link XmlFactories}, URI-fetching XPath 3.1+ functions ({@code doc()}, {@code collection()},
* {@code unparsed-text()}) are not resolved.</p>
*
* @return a hardened factory.
* @throws IllegalStateException if the underlying XPath implementation is not recognized by any bundled hardening recipe, or if the matching recipe cannot
* apply its settings to it.
*/
public static XPathFactory newXPathFactory() {
return dispatch(XPathFactory.newInstance());
}
private static HardeningException noProvider(final Object factory) {
return new HardeningException("No hardening recipe for JAXP factory class " + factory.getClass().getName());
}
private XmlFactories() {
// static only
}
}