View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.configuration2.io;
18  
19  import java.io.File;
20  import java.net.URL;
21  import java.nio.ByteBuffer;
22  import java.nio.charset.Charset;
23  import java.nio.charset.StandardCharsets;
24  
25  import org.apache.commons.lang3.Strings;
26  
27  /**
28   * This class is a subset of org.apache.commons.io.FileUtils, git-svn-id:
29   * https://svn.apache.org/repos/asf/commons/proper/io/trunk@1423916 13f79535-47bb-0310-9956-ffa450edef68. The subset is
30   * determined by {@link FileLocatorUtils}. The copied constants and methods are <em>literally</em> copied.
31   *
32   * See CONFIGURATION-521 for a discussion.
33   */
34  final class FileUtils {
35  
36      /**
37       * The UTF-8 character set, used to decode octets in URLs.
38       */
39      private static final Charset UTF8 = StandardCharsets.UTF_8;
40  
41      /**
42       * Decodes the specified URL as per RFC 3986, i.e. transforms percent-encoded octets to characters by decoding with the
43       * UTF-8 character set. This function is primarily intended for usage with {@link java.net.URL} which unfortunately does
44       * not enforce proper URLs. As such, this method will leniently accept invalid characters or malformed percent-encoded
45       * octets and simply pass them literally through to the result string. Except for rare edge cases, this will make
46       * unencoded URLs pass through unaltered.
47       *
48       * @param url The URL to decode, may be {@code null}.
49       * @return The decoded URL or {@code null} if the input was {@code null}.
50       */
51      static String decodeUrl(final String url) {
52          String decoded = url;
53          if (url != null && url.indexOf('%') >= 0) {
54              final int n = url.length();
55              final StringBuilder buffer = new StringBuilder();
56              final ByteBuffer bytes = ByteBuffer.allocate(n);
57              for (int i = 0; i < n;) {
58                  if (url.charAt(i) == '%') {
59                      try {
60                          do {
61                              final byte octet = (byte) Integer.parseInt(url.substring(i + 1, i + 3), 16);
62                              bytes.put(octet);
63                              i += 3;
64                          } while (i < n && url.charAt(i) == '%');
65                          continue;
66                      } catch (final RuntimeException ignored) {
67                          // malformed percent-encoded octet, fall through and
68                          // append characters literally
69                      } finally {
70                          if (bytes.position() > 0) {
71                              bytes.flip();
72                              buffer.append(UTF8.decode(bytes));
73                              bytes.clear();
74                          }
75                      }
76                  }
77                  buffer.append(url.charAt(i++));
78              }
79              decoded = buffer.toString();
80          }
81          return decoded;
82      }
83  
84      /**
85       * Convert from a {@code URL} to a {@code File}.
86       * <p>
87       * From version 1.1 this method will decode the URL. Syntax such as {@code file:///my%20docs/file.txt} will be correctly
88       * decoded to {@code /my docs/file.txt}. Starting with version 1.5, this method uses UTF-8 to decode percent-encoded
89       * octets to characters. Additionally, malformed percent-encoded octets are handled leniently by passing them through
90       * literally.
91       * </p>
92       *
93       * @param url the file URL to convert, {@code null} returns {@code null}
94       * @return the equivalent {@code File} object, or {@code null} if the URL's protocol is not {@code file}
95       */
96      public static File toFile(final URL url) {
97          if (url == null || !Strings.CI.equals("file", url.getProtocol())) {
98              return null;
99          }
100         String fileName = url.getFile().replace('/', File.separatorChar);
101         fileName = decodeUrl(fileName);
102         return new File(fileName);
103     }
104 
105 }