View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.configuration2.io;
19  
20  import java.net.MalformedURLException;
21  import java.net.URL;
22  import java.util.Collections;
23  import java.util.LinkedHashSet;
24  import java.util.Objects;
25  import java.util.Set;
26  import java.util.function.Function;
27  import java.util.regex.Pattern;
28  import java.util.stream.Collectors;
29  
30  import org.apache.commons.configuration2.ex.ConfigurationDeniedException;
31  import org.apache.commons.io.build.AbstractSupplier;
32  import org.apache.commons.lang3.StringUtils;
33  
34  /**
35   * Abstracts services for FileLocationStrategy implementations.
36   * <p>
37   * Note that some FileLocationStrategy implementation use URLs internally to encode file locations.
38   * </p>
39   * <p>
40   * As of version 2.15.0, by default, the only URL schemes allowed are {@code file} and {@code jar}. To override this default, you can either use the system
41   * property {@code org.apache.commons.configuration2.io.FileLocationStrategy.schemes} or build a subclass of {@link AbstractFileLocationStrategy}.
42   * </p>
43   * <strong>Using System Properties</strong>
44   * <p>
45   * The system property {@code org.apache.commons.configuration2.io.FileLocationStrategy.schemes} String value must be a comma-separated list of schemes,
46   * where the default is {@code "file,jar"}, and the complete list is {@code "file,http,https,jar"}.
47   * </p>
48   * <strong>Using a Builder</strong>
49   * <p>
50   * The root builder for {@link AbstractFileLocationStrategy} is {@link AbstractBuilder} where you define allowed schemes and hosts through its setter
51   * methods.
52   * </p>
53   * <p>
54   * For example, to programatically enable the shemes "file", "http", "https", and "jar" for all strategies, you write:
55   * </p>
56   * <pre>{@code
57   * final PropertiesConfiguration pc = new PropertiesConfiguration();
58   *      pc.setIncludeListener(PropertiesConfiguration.NOOP_INCLUDE_LISTENER);
59   *      final FileHandler handler = new FileHandler(pc);
60   *      final CombinedLocationStrategy.Builder builder = new CombinedLocationStrategy.Builder()
61   *              .setSchemes(new TreeSet<>(Arrays.asList("file", "http", "https", "jar")));
62   *      // @formatter:off
63   *      handler.setLocationStrategy(builder.setSubStrategies(Arrays.asList(
64   *              new ProvidedURLLocationStrategy(builder),
65   *              new FileSystemLocationStrategy(builder),
66   *              new AbsoluteNameLocationStrategy(builder),
67   *              new BasePathLocationStrategy(builder),
68   *              new HomeDirectoryLocationStrategy.Builder().setEvaluateBasePath(true).getUnchecked(),
69   *              new HomeDirectoryLocationStrategy.Builder().setEvaluateBasePath(false).getUnchecked(),
70   *              new ClasspathLocationStrategy(builder)))
71   *              .get());
72   *      // @formatter:on
73   *      handler.setBasePath(TEST_BASE_PATH);
74   *      handler.setFileName("include-load-url-host-unknown-exception.properties");
75   *      handler.load();
76   * }</pre>
77   *
78   *
79   * @since 2.15.0
80   * @see FileLocationStrategy
81   */
82  public abstract class AbstractFileLocationStrategy implements FileLocationStrategy {
83  
84      /**
85       * Builds new instances for subclasses.
86       * <p>
87       * As of version 2.15.0, by default, the only URL schemes allowed are {@code file} and {@code jar}. To override this default, you can either use the system
88       * property {@code org.apache.commons.configuration2.io.FileLocationStrategy.schemes} or build a subclass of {@link AbstractFileLocationStrategy}.
89       * </p>
90       * <strong>Using System Properties</strong>
91       * <p>
92       * The system property {@code org.apache.commons.configuration2.io.FileLocationStrategy.schemes} String value must be a comma-separated list of schemes,
93       * where the default is {@code "file,jar"}, and the complete list is {@code "file,http,https,jar"}.
94       * </p>
95       * <strong>Using a Builder</strong>
96       * <p>
97       * The root builder for {@link AbstractFileLocationStrategy} is {@link AbstractBuilder} where you define allowed schemes and hosts through its setter
98       * methods.
99       * </p>
100      * <p>
101      * See {@link AbstractFileLocationStrategy} learn how to grant an deny URL schemes and hosts.
102      * </p>
103      *
104      * @param <T> The type of {@link FileLocationStrategy} to build.
105      * @param <B> The builder type.
106      */
107     public abstract static class AbstractBuilder<T extends FileLocationStrategy, B extends AbstractBuilder<T, B>> extends AbstractSupplier<T, B> {
108 
109         /**
110          * Enabled URL-based hosts, empty means all are enabled. Host are case-insensitive.
111          */
112         private Set<Pattern> hosts = Collections.emptySet();
113         /**
114          * Enabled URL-based schemes, empty means all are enabled. Schemes are case-insensitive.
115          */
116         private Set<String> schemes = Collections.emptySet();
117 
118         /**
119          * Constructs a new instance for subclasses.
120          */
121         public AbstractBuilder() {
122             // empty
123         }
124 
125         Set<Pattern> getHosts() {
126             return hosts;
127         }
128 
129         Set<String> getSchemes() {
130             return schemes;
131         }
132 
133         /**
134          * Sets enabled URL-based hosts, empty means all are enabled. URL hosts are case-insensitive.
135          *
136          * @param hosts enabled URL-based hosts.
137          * @return {@code this} instance.
138          */
139         public B setHosts(final Set<Pattern> hosts) {
140             this.hosts = hosts != null ? hosts : Collections.emptySet();
141             return asThis();
142         }
143 
144         /**
145          * Sets enabled URL-based hosts, empty means all are enabled. URL hosts are case-insensitive.
146          *
147          * @param hosts Regular expressions enabled URL-based hosts.
148          * @return {@code this} instance.
149          */
150         public B setHostsRegEx(final Set<String> hosts) {
151             return setHosts(hosts.stream().map(e -> Pattern.compile(e, Pattern.CASE_INSENSITIVE)).collect(Collectors.toSet()));
152         }
153 
154         /**
155          * Sets enabled URL-based schemes, empty means all are enabled. URL schemes are case-insensitive.
156          *
157          * @param schemes enabled URL-based schemes, the default null means all schemes are allowed.
158          * @return {@code this} instance.
159          */
160         public B setSchemes(final Set<String> schemes) {
161             this.schemes = schemes != null ? schemes : Collections.emptySet();
162             return asThis();
163         }
164     }
165 
166     /**
167      * Builds new instances of T.
168      *
169      * @param <T> The type of {@link FileLocationStrategy} to build.
170      */
171     public static class StrategyBuilder<T extends FileLocationStrategy> extends AbstractBuilder<T, StrategyBuilder<T>> {
172 
173         /**
174          * Either set this or implement get().
175          */
176         private final Function<StrategyBuilder<T>, T> function;
177 
178         /**
179          * Constructs a new instance for subclasses.
180          *
181          * @param function Builds an instance of T.
182          */
183         public StrategyBuilder(final Function<StrategyBuilder<T>, T> function) {
184             this.function = Objects.requireNonNull(function, "function");
185         }
186 
187         @Override
188         public T get() {
189             return function.apply(asThis());
190         }
191     }
192 
193     /**
194      * Default schemes.
195      */
196     private static final String DEFAULT_SCHEMES = "file,jar";
197     /**
198      * The system property key {@code org.apache.commons.configuration2.io.FileLocationStrategy.schemes}.
199      * <p>
200      * If absent, defaults to {@code "file,jar"}.
201      * </p>
202      * <p>
203      * For complete functionality, use {@code "file,http,https,jar"}.
204      * </p>
205      */
206     private static final String KEY_SCHEMES = "org.apache.commons.configuration2.io.FileLocationStrategy.schemes";
207 
208     private static void checkHost(final String value, final Set<Pattern> validSet) {
209         final String lowerCase = StringUtils.toRootLowerCase(value);
210         if (!validSet.isEmpty() && StringUtils.isNotEmpty(lowerCase) && validSet.stream().noneMatch(p -> p.matcher(lowerCase).matches())) {
211             throw new ConfigurationDeniedException("URL host is not enabled: %s; must be one of %s", value, validSet);
212         }
213     }
214 
215     /**
216      * Checks if the scheme is allowed.
217      *
218      * @param value A URL scheme, never empty or {@code null}.
219      * @param validSet the scheme valid-set.
220      */
221     private static void checkScheme(final String value, final Set<String> validSet) {
222         if (!validSet.isEmpty() && !validSet.contains(StringUtils.toRootLowerCase(value))) {
223             throw new ConfigurationDeniedException("URL scheme \"%s\" is not enabled, must be one of %s, override defaults with the system property \"%s\", "
224                     + "complete set: \"file,http,https,jar\"", value, validSet, KEY_SCHEMES);
225         }
226     }
227 
228     /**
229      * Validates {@code url} against the scheme and host allow-lists.
230      *
231      * @param url           the URL to check.
232      * @param validSchemes  the scheme valid-set.
233      * @param validHosts    the host valid-set.
234      * @throws ConfigurationDeniedException if the URL or any embedded URL fails the check, or a {@code jar:} URL is malformed.
235      */
236     static void checkUrl(final URL url, final Set<String> validSchemes, final Set<Pattern> validHosts) {
237         final String scheme = url.getProtocol();
238         checkScheme(scheme, validSchemes);
239         if ("jar".equalsIgnoreCase(scheme)) {
240             try {
241                 // Follows the logic of JarURLConnection#parseSpecs without the cost of opening a connection.
242                 final String spec = url.getFile();
243                 final int sep = spec.lastIndexOf("!/");
244                 if (sep < 0) {
245                     throw new MalformedURLException("no !/ found in url spec:" + spec);
246                 }
247                 final URL inner = new URL(spec.substring(0, sep));
248                 checkUrl(inner, validSchemes, validHosts);
249             } catch (final MalformedURLException e) {
250                 throw new ConfigurationDeniedException(e, "Malformed 'jar:' URL: %s", url);
251             }
252         } else {
253             checkHost(url.getHost(), validHosts);
254         }
255     }
256 
257     private static Set<String> getSchemesProperty() {
258         final Set<String> set = new LinkedHashSet<>();
259         final String[] split = System.getProperty(KEY_SCHEMES, DEFAULT_SCHEMES).split(",");
260         Collections.addAll(set, split);
261         return set;
262     }
263 
264     /**
265      * Enabled URL-based hosts, empty means all are enabled. Host are case-insensitive.
266      */
267     private final Set<Pattern> hosts;
268     /**
269      * Enabled URL-based schemes, empty means all are enabled. Schemes are case-insensitive.
270      */
271     private final Set<String> schemes;
272 
273     /**
274      * Constructs a new instance where the enabled URL schemes are read the system property
275      * {@code "org.apache.commons.configuration2.io.FileLocationStrategy.schemes"}.
276      * <p>
277      * If absent, defaults to {@code "file,jar"}.
278      * </p>
279      * <p>
280      * For complete functionality, use {@code "file,http,https,jar"}.
281      * </p>
282      */
283     AbstractFileLocationStrategy() {
284         this(getSchemesProperty());
285     }
286 
287     AbstractFileLocationStrategy(final AbstractBuilder<?, ?> builder) {
288         Objects.requireNonNull(builder, "builder");
289         this.schemes = builder.schemes;
290         this.hosts = builder.hosts != null ? builder.hosts : Collections.emptySet();
291     }
292 
293     AbstractFileLocationStrategy(final Set<String> schemes) {
294         this.schemes = schemes;
295         this.hosts = Collections.emptySet();
296     }
297 
298     URL check(final URL url) {
299         if (url != null) {
300             checkUrl(url, schemes, hosts);
301         }
302         return url;
303     }
304 
305     /**
306      * Gets the enabled hosts.
307      *
308      * @return the enabled hosts.
309      */
310     Set<Pattern> getHosts() {
311         return hosts;
312     }
313 
314     /**
315      * Gets the enabled schemes.
316      *
317      * @return the enabled schemes.
318      */
319     Set<String> getSchemes() {
320         return schemes;
321     }
322 
323     @Override
324     public String toString() {
325         return getClass().getSimpleName() + " [schemes=" + schemes + ", hosts=" + hosts + "]";
326     }
327 }