XercesProvider.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.commons.xml;

import static org.apache.commons.xml.JaxpSetters.setFeature;

import javax.xml.XMLConstants;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;
import javax.xml.validation.Validator;
import javax.xml.validation.ValidatorHandler;

import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.XMLReader;

/**
 * Hardening recipes for the external Apache Xerces distribution (the {@code xerces:xercesImpl} artifact).
 *
 * <p>Factory classes live in the {@code org.apache.xerces.*} package. External Xerces does not ship a {@code TransformerFactory}, {@code XMLInputFactory} or
 * {@code XPathFactory}, so this class only handles SAX and Schema factories. DOM hardening lives in {@link DocumentBuilderHardener}.</p>
 *
 * <p>Hardening recipe applied to every factory below uses the same building blocks:</p>
 * <ul>
 *     <li><strong>FSP</strong> ({@link XMLConstants#FEATURE_SECURE_PROCESSING}, set to {@code true}): enables Xerces' built-in {@code SecurityManager}, which
 *         is what carries the processing limits. Required.</li>
 *     <li><strong>{@link Limits#applyToXerces}</strong>: defense-in-depth. Xerces' {@code SecurityManager} ships its own caps, but they are looser than even
 *         JDK 8's secure values; this call pins them to the JDK 25 secure values (entity-expansion limit and {@code maxOccurs} node limit, the only two its
 *         API exposes setters for).</li>
 *     <li>
 *         <p><strong>{@code HardeningXxx} wrappers + {@link Resolvers.DenyAll}</strong>: required. Xerces does not implement the JAXP 1.5
 *         {@code ACCESS_EXTERNAL_*} properties, so an explicit resolver installed on every parser/validator is the best way to block external
 *         entity, DTD and schema fetching, without disabling those features altogether. The wrappers exist for two reasons:</p>
 *         <ol>
 *             <li>{@link SAXParserFactory} carries no resolver, so it has to be set on each {@link SAXParser} produced.</li>
 *             <li>Xerces' {@link Schema} does not propagate the {@link SchemaFactory}'s resolver or security manager to its
 *             {@link Validator} / {@link ValidatorHandler} products, so the wrapper re-installs both on every product.</li>
 *         </ol>
 *     </li>
 * </ul>
 */
final class XercesProvider {

    private static Validator hardenValidator(final Validator validator) {
        try {
            Limits.applyToXerces(validator.getProperty(XERCES_SECURITY_MANAGER_PROPERTY));
        } catch (final SAXNotRecognizedException | SAXNotSupportedException e) {
            throw new HardeningException("Failed to read Xerces security manager from Validator", e);
        }
        validator.setResourceResolver(Resolvers.DenyAll.LS_RESOURCE);
        return validator;
    }

    private static ValidatorHandler hardenValidatorHandler(final ValidatorHandler handler) {
        try {
            Limits.applyToXerces(handler.getProperty(XERCES_SECURITY_MANAGER_PROPERTY));
        } catch (final SAXNotRecognizedException | SAXNotSupportedException e) {
            throw new HardeningException("Failed to read Xerces security manager from ValidatorHandler", e);
        }
        handler.setResourceResolver(Resolvers.DenyAll.LS_RESOURCE);
        return handler;
    }

    /**
     * Xerces-specific property whose value is an {@code org.apache.xerces.util.SecurityManager} instance carrying processing-limit thresholds
     */
    static final String XERCES_SECURITY_MANAGER_PROPERTY = "http://apache.org/xml/properties/security-manager";

    /** Xerces feature: load the external DTD subset for non-validating parsers. */
    private static final String XERCES_LOAD_EXTERNAL_DTD = "http://apache.org/xml/features/nonvalidating/load-external-dtd";

    static SAXParserFactory configure(final SAXParserFactory factory) {
        // Required: enables Xerces' built-in SecurityManager (which is what carries the limits).
        setFeature(factory, XMLConstants.FEATURE_SECURE_PROCESSING, true);
        // Useful: namespaces should be recognized by default
        factory.setNamespaceAware(true);
        // The remaining hardening (limits, entity resolver) lives in the XMLReader configure() because SAXParserFactory has no property API.
        return new HardeningSAXParserFactory(factory, XercesProvider::configure);
    }

    static XMLReader configure(final XMLReader reader) {
        // Required: enables the JDK XMLSecurityManager limits on a raw reader (e.g. one Saxon picked).
        setFeature(reader, XMLConstants.FEATURE_SECURE_PROCESSING, true);
        // Let DOCTYPE-only documents parse silently without SSRF: skip the external DTD subset on non-validating parsers.
        setFeature(reader, XERCES_LOAD_EXTERNAL_DTD, false);
        try {
            // Defense-in-depth: tighten the SecurityManager Xerces already installed on the reader to JDK 25 limits.
            Limits.applyToXerces(reader.getProperty(XERCES_SECURITY_MANAGER_PROPERTY));
        } catch (final SAXNotRecognizedException | SAXNotSupportedException e) {
            throw new HardeningException("Failed to read Xerces security manager from XMLReader", e);
        }
        // Required: Xerces does not honour JAXP 1.5 ACCESS_EXTERNAL_*; the deny-all resolver is the only block.
        reader.setEntityResolver(Resolvers.DenyAll.ENTITY2);
        return reader;
    }

    static SchemaFactory configure(final SchemaFactory factory) {
        // Required: enables Xerces' built-in SecurityManager.
        setFeature(factory, XMLConstants.FEATURE_SECURE_PROCESSING, true);
        try {
            // Required: pins limits to JDK 25 secure values, otherwise Xerces' own caps are looser than JDK 8.
            Limits.applyToXerces(factory.getProperty(XERCES_SECURITY_MANAGER_PROPERTY));
        } catch (final SAXNotRecognizedException | SAXNotSupportedException e) {
            throw new HardeningException("Failed to read Xerces security manager from SchemaFactory", e);
        }
        // Required: Xerces ignores ACCESS_EXTERNAL_*; the deny-all resolver blocks xs:import/include/redefine fetches.
        factory.setResourceResolver(Resolvers.DenyAll.LS_RESOURCE);
        // Required: routes every newSchema(Source[]) parse through an XmlFactories-hardened reader, and re-installs limits + resolver on each Validator and
        // ValidatorHandler since Xerces' Schema does not propagate factory state through.
        return new HardeningSchemaFactory(factory, XercesProvider::hardenValidator, XercesProvider::hardenValidatorHandler);
    }

    private XercesProvider() {
    }
}