001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.xml;
019
020import javax.xml.XMLConstants;
021import javax.xml.parsers.DocumentBuilderFactory;
022import javax.xml.parsers.ParserConfigurationException;
023import javax.xml.parsers.SAXParserFactory;
024import javax.xml.stream.XMLInputFactory;
025import javax.xml.transform.Source;
026import javax.xml.transform.TransformerConfigurationException;
027import javax.xml.transform.TransformerFactory;
028import javax.xml.transform.sax.SAXSource;
029import javax.xml.transform.stream.StreamSource;
030import javax.xml.validation.SchemaFactory;
031import javax.xml.xpath.XPathFactory;
032
033import org.xml.sax.InputSource;
034import org.xml.sax.SAXException;
035import org.xml.sax.XMLReader;
036
037/**
038 * Entry point for obtaining hardened JAXP factories.
039 *
040 * <p>Every method on this class returns a <em>fresh, hardened</em> factory instance. No caching or pooling is performed; callers on a hot path are responsible
041 * for their own caching.</p>
042 *
043 * <h2>Hardening guarantees</h2>
044 *
045 * <p>Every factory returned by this class makes the same three guarantees, regardless of which JAXP implementation is on the classpath:</p>
046 *
047 * <ul>
048 *   <li><strong>External DTDs are not fetched.</strong></li>
049 *   <li><strong>External entities are not resolved.</strong></li>
050 *   <li><strong>Internal entity expansion is bounded</strong> by the JDK's default limit, so DoS payloads such as Billion Laughs are rejected before they
051 *       exhaust resources.</li>
052 * </ul>
053 *
054 * <p>The guarantees hold whether or not the caller opts into DTD validation
055 * ({@link javax.xml.parsers.DocumentBuilderFactory#setValidating(boolean) setValidating(true)}) or attaches a compiled XSD via
056 * {@link javax.xml.parsers.DocumentBuilderFactory#setSchema(javax.xml.validation.Schema) setSchema}: every external resource the validation would otherwise
057 * fetch (the DTD itself, an {@code xsi:schemaLocation} hint, an external entity referenced from the DTD) remains blocked.</p>
058 *
059 * <p>Each method on this class adds factory-specific guarantees on top of the three above, documented on the corresponding {@code newXxxFactory()} method.</p>
060 *
061 * <h2>Caller-supplied URIs</h2>
062 *
063 * <p>A top-level URI passed directly by the caller is fetched as-is: {@code StreamSource(systemId)}, {@code DocumentBuilder.parse(String)}, or a
064 * {@code SAXSource} built from a system id all cause the JAXP implementation to open that URI without consulting the hardening layer. Use a
065 * {@link javax.xml.transform.URIResolver} or {@link org.xml.sax.EntityResolver} if you need to restrict the top-level fetch.</p>
066 *
067 * <h2>Thread safety</h2>
068 *
069 * <p>The returned factories inherit the thread-safety properties of the underlying JAXP implementation, which in practice means they are <strong>not
070 * guaranteed to be thread-safe</strong>. Create a new factory per thread or synchronise externally.</p>
071 *
072 * <p>This class itself is thread-safe: all methods are static and stateless.</p>
073 */
074public final class XmlFactories {
075
076    private static SAXParserFactory dispatch(final SAXParserFactory factory) {
077        switch (factory.getClass().getName()) {
078            case "com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl":
079                return StockJdkProvider.configure(factory);
080            case "org.apache.harmony.xml.parsers.SAXParserFactoryImpl":
081                return AndroidProvider.configure(factory);
082            case "org.apache.xerces.jaxp.SAXParserFactoryImpl":
083                return XercesProvider.configure(factory);
084            default:
085                throw noProvider(factory);
086        }
087    }
088
089    private static XMLInputFactory dispatch(final XMLInputFactory factory) {
090        switch (factory.getClass().getName()) {
091            case "com.sun.xml.internal.stream.XMLInputFactoryImpl":
092                return StockJdkProvider.configure(factory);
093            case "com.ctc.wstx.stax.WstxInputFactory":
094                return WoodstoxProvider.configure(factory);
095            default:
096                throw noProvider(factory);
097        }
098    }
099
100    private static TransformerFactory dispatch(final TransformerFactory factory) {
101        switch (factory.getClass().getName()) {
102            case "com.sun.org.apache.xalan.internal.xsltc.trax.TransformerFactoryImpl":
103                return StockJdkProvider.configure(factory);
104            case "org.apache.xalan.processor.TransformerFactoryImpl":
105            case "org.apache.xalan.xsltc.trax.TransformerFactoryImpl":
106                return XalanProvider.configure(factory);
107            case "net.sf.saxon.TransformerFactoryImpl":
108            case "com.saxonica.config.ProfessionalTransformerFactory":
109            case "com.saxonica.config.EnterpriseTransformerFactory":
110                return SaxonProvider.configure(factory);
111            default:
112                throw noProvider(factory);
113        }
114    }
115
116    private static XPathFactory dispatch(final XPathFactory factory) {
117        switch (factory.getClass().getName()) {
118            case "com.sun.org.apache.xpath.internal.jaxp.XPathFactoryImpl":
119                return StockJdkProvider.configure(factory);
120            case "org.apache.xpath.jaxp.XPathFactoryImpl":
121                return XalanProvider.configure(factory);
122            case "net.sf.saxon.xpath.XPathFactoryImpl":
123                return SaxonProvider.configure(factory);
124            default:
125                throw noProvider(factory);
126        }
127    }
128
129    private static SchemaFactory dispatch(final SchemaFactory factory) {
130        switch (factory.getClass().getName()) {
131            case "com.sun.org.apache.xerces.internal.jaxp.validation.XMLSchemaFactory":
132                return StockJdkProvider.configure(factory);
133            case "org.apache.xerces.jaxp.validation.XMLSchemaFactory":
134                return XercesProvider.configure(factory);
135            default:
136                throw noProvider(factory);
137        }
138    }
139
140    /**
141     * Rewrites a {@link Source} so that any SAX parsing it triggers runs through an {@link XmlFactories}-hardened {@link XMLReader}.
142     *
143     * <p>Only {@link StreamSource} and {@link SAXSource} without a reader are enriched with a hardened reader. Other kinds of sources are returned as-is.</p>
144     *
145     * @param source the source to harden; never {@code null}.
146     * @return a hardened source.
147     * @throws TransformerConfigurationException if a hardened reader cannot be obtained.
148     */
149    public static Source harden(final Source source) throws TransformerConfigurationException {
150        if (source instanceof StreamSource || source instanceof SAXSource && ((SAXSource) source).getXMLReader() == null) {
151            try {
152                final XMLReader reader = newSAXParserFactory().newSAXParser().getXMLReader();
153                final InputSource inputSource = SAXSource.sourceToInputSource(source);
154                return inputSource == null ? source : new SAXSource(reader, inputSource);
155            } catch (final ParserConfigurationException | SAXException e) {
156                throw new TransformerConfigurationException("Failed to obtain a hardened XMLReader for source parsing", e);
157            }
158        }
159        return source;
160    }
161
162    /**
163     * Hardens an existing {@link XMLReader}.
164     *
165     * @param reader the reader to harden; never {@code null}.
166     * @return a hardened reader.
167     * @throws IllegalStateException if the reader's concrete class is not recognized by any bundled hardening recipe, or if the matching recipe cannot apply
168     *         its settings to it.
169     */
170    public static XMLReader harden(final XMLReader reader) {
171        switch (reader.getClass().getName()) {
172            case "com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser":
173                return StockJdkProvider.configure(reader);
174            case "org.apache.harmony.xml.ExpatReader":
175            case "org.apache.commons.xml.AndroidProvider$GuardedXMLReader":
176                return AndroidProvider.configure(reader);
177            case "org.apache.xerces.jaxp.SAXParserImpl$JAXPSAXParser":
178                return XercesProvider.configure(reader);
179            default:
180                throw noProvider(reader);
181        }
182    }
183
184    /**
185     * Returns a fresh, hardened {@link DocumentBuilderFactory}.
186     *
187     * <p><strong>Enabling XInclude:</strong> {@link DocumentBuilderFactory#setXIncludeAware(boolean) setXIncludeAware(true)} on its own does not make XInclude
188     * usable, because an included resource is fetched like any other external resource and is therefore blocked, failing the parse. A caller that genuinely
189     * wants XInclude must, in addition to enabling awareness, install a custom {@link org.xml.sax.EntityResolver} that permits those specific lookups.</p>
190     *
191     * @return a hardened factory.
192     * @throws IllegalStateException if a required hardening setting cannot be applied to the underlying implementation.
193     */
194    public static DocumentBuilderFactory newDocumentBuilderFactory() {
195        return DocumentBuilderHardener.harden(DocumentBuilderFactory.newInstance());
196    }
197
198    /**
199     * Returns a fresh, hardened {@link SAXParserFactory}.
200     *
201     * <p>Beyond the three universal guarantees on {@link XmlFactories}, XInclude resolution is disabled. Calling
202     * {@link SAXParserFactory#setXIncludeAware(boolean) setXIncludeAware(true)} on the returned factory does not re-enable resolution; a parse that encounters
203     * an {@code xi:include} element fails.</p>
204     *
205     * @return a hardened factory.
206     * @throws IllegalStateException if the underlying JAXP implementation is not recognized by any bundled hardening recipe, or if the matching recipe cannot
207     *         apply its settings to it.
208     */
209    public static SAXParserFactory newSAXParserFactory() {
210        return dispatch(SAXParserFactory.newInstance());
211    }
212
213    /**
214     * Returns a fresh, hardened {@link SchemaFactory} configured for W3C XML Schema ({@link XMLConstants#W3C_XML_SCHEMA_NS_URI}).
215     *
216     * <p>Beyond the three universal guarantees on {@link XmlFactories}:</p>
217     *
218     * <ul>
219     *   <li>{@code xs:import}, {@code xs:include} and {@code xs:redefine} schemaLocation URIs are not resolved during schema compilation, and</li>
220     *   <li>{@code xsi:schemaLocation} / {@code xsi:noNamespaceSchemaLocation} hints in instance documents are not resolved during validation.</li>
221     * </ul>
222     *
223     * <p>The same guarantees apply to {@link javax.xml.validation.Validator} and {@link javax.xml.validation.ValidatorHandler} instances produced from the
224     * resulting {@link javax.xml.validation.Schema}.</p>
225     *
226     * @return a hardened factory.
227     * @throws IllegalStateException if the underlying Schema implementation is not recognized by any bundled hardening recipe, or if the matching recipe
228     *         cannot apply its settings to it.
229     */
230    public static SchemaFactory newSchemaFactory() {
231        return dispatch(SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI));
232    }
233
234    /**
235     * Returns a fresh, hardened {@link TransformerFactory}.
236     *
237     * <p>Beyond the three universal guarantees on {@link XmlFactories}: {@code xsl:import}, {@code xsl:include} and {@code document()} URIs are not
238     * resolved.</p>
239     *
240     * <p>The guarantees apply to every parser the factory creates internally, both for stylesheet compilation and for source-document reading at
241     * {@code Transformer.transform(Source, Result)} time.</p>
242     *
243     * @return a hardened factory.
244     * @throws IllegalStateException if the underlying TrAX implementation is not recognized by any bundled hardening recipe, or if the matching recipe cannot
245     *         apply its settings to it.
246     */
247    public static TransformerFactory newTransformerFactory() {
248        return dispatch(TransformerFactory.newInstance());
249    }
250
251    /**
252     * Returns a fresh, hardened {@link XMLInputFactory}.
253     *
254     * <p>The three universal guarantees on {@link XmlFactories} apply; StAX exposes no additional vectors beyond them.</p>
255     *
256     * @return a hardened factory.
257     * @throws IllegalStateException if the underlying StAX implementation is not recognized by any bundled hardening recipe, or if the matching recipe cannot
258     *         apply its settings to it.
259     */
260    public static XMLInputFactory newXMLInputFactory() {
261        return dispatch(XMLInputFactory.newInstance());
262    }
263
264    /**
265     * Returns a fresh, hardened {@link XPathFactory} for the default XPath object model.
266     *
267     * <p>Beyond the three universal guarantees on {@link XmlFactories}, URI-fetching XPath 3.1+ functions ({@code doc()}, {@code collection()},
268     * {@code unparsed-text()}) are not resolved.</p>
269     *
270     * @return a hardened factory.
271     * @throws IllegalStateException if the underlying XPath implementation is not recognized by any bundled hardening recipe, or if the matching recipe cannot
272     *         apply its settings to it.
273     */
274    public static XPathFactory newXPathFactory() {
275        return dispatch(XPathFactory.newInstance());
276    }
277
278    private static HardeningException noProvider(final Object factory) {
279        return new HardeningException("No hardening recipe for JAXP factory class " + factory.getClass().getName());
280    }
281
282    private XmlFactories() {
283        // static only
284    }
285}