WoodstoxProvider.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.xml;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLResolver;
import javax.xml.stream.XMLStreamException;
/**
* Hardening recipe for the FasterXML Woodstox StAX implementation ({@code com.ctc.wstx:woodstox-core}).
*
* <p>Woodstox is a StAX-only library, so this class only handles {@link XMLInputFactory}.</p>
*
* <p>Hardening recipe used below:</p>
* <ul>
* <li><strong>{@link Limits#applyToWoodstox}</strong>: defense-in-depth. Woodstox already enforces its own caps (see {@code ReaderConfig.DEFAULT_*}), but
* they are looser than the JDK 25 secure values; this call aligns them so a Woodstox-backed factory enforces the same processing limits as the JDK
* parsers.</li>
* <li><strong>Three resolver hooks</strong>: required. Woodstox honours {@code IS_SUPPORTING_EXTERNAL_ENTITIES=true} and {@code SUPPORT_DTD=true} by
* default and routes lookups through three Woodstox-specific resolver properties; the hardened factory installs:
* <ul>
* <li>{@code com.ctc.wstx.dtdResolver} = {@link #DTD_SUBSET_ONLY}: returns an empty input for the external DTD subset so DOCTYPE-only
* documents parse silently, but throws on external parameter entities (which share this hook). The {@code entityName} 4th argument is
* the discriminator: {@code null} for the subset, the entity name for parameter entities (see {@code DefaultInputResolver} and
* {@code ValidatingStreamReader.findDtdExtSubset}).</li>
* <li>{@code com.ctc.wstx.entityResolver} = {@link Resolvers.DenyAll#XML}: throws on declared external general entities.</li>
* <li>{@code com.ctc.wstx.undeclaredEntityResolver} = {@link Resolvers.IgnoreAll#XML}: silently drops references to entities the parser has not
* seen declared, matching the SAX path's behaviour.</li>
* </ul>
* </li>
* </ul>
*/
final class WoodstoxProvider {
/** Woodstox property: resolver consulted for the external DTD subset and for external parameter entities. */
private static final String WSTX_DTD_RESOLVER = "com.ctc.wstx.dtdResolver";
/** Woodstox property: resolver consulted for declared external general entities. */
private static final String WSTX_ENTITY_RESOLVER = "com.ctc.wstx.entityResolver";
/** Woodstox property: resolver consulted for undeclared entity references. */
private static final String WSTX_UNDECLARED_ENTITY_RESOLVER = "com.ctc.wstx.undeclaredEntityResolver";
/**
* Hybrid Woodstox DTD resolver: returns the empty input for the external DTD subset, throws on external parameter entities.
*
* <p>Woodstox calls this hook with {@code entityName == null} for the subset and {@code entityName != null} for parameter-entity expansion; that
* discriminator is Woodstox-specific (the JDK Zephyr's {@code XMLResolver} always receives {@code null} as the 4th argument), so the resolver lives
* here rather than in {@link Resolvers}.</p>
*/
static final XMLResolver DTD_SUBSET_ONLY = (publicID, systemID, baseURI, entityName) -> {
if (entityName != null) {
throw new XMLStreamException("External parameter entity '" + entityName + "' refused (publicID=" + publicID + ", systemID=" + systemID
+ ", baseURI=" + baseURI + ")");
}
return Resolvers.IgnoreAll.XML.resolveEntity(publicID, systemID, baseURI, entityName);
};
static XMLInputFactory configure(final XMLInputFactory factory) {
// Defense-in-depth: align Woodstox's built-in caps with the JDK 25 secure values; Woodstox's own defaults are functional but looser.
Limits.applyToWoodstox(factory);
// Required: empty external subset, throw on external parameter entities.
factory.setProperty(WSTX_DTD_RESOLVER, DTD_SUBSET_ONLY);
// Required: throw on declared external general entities.
factory.setProperty(WSTX_ENTITY_RESOLVER, Resolvers.DenyAll.XML);
// Required: silently drop undeclared entity references, matching the SAX path's tolerance.
factory.setProperty(WSTX_UNDECLARED_ENTITY_RESOLVER, Resolvers.IgnoreAll.XML);
return factory;
}
private WoodstoxProvider() {
}
}