View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   https://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  
20  package org.apache.commons.compress.archivers.zip;
21  
22  import static java.nio.charset.StandardCharsets.UTF_8;
23  
24  import java.nio.ByteBuffer;
25  import java.nio.charset.Charset;
26  
27  import org.apache.commons.io.Charsets;
28  
29  /**
30   * Static helper functions for robustly encoding file names in ZIP files.
31   */
32  public abstract class ZipEncodingHelper {
33  
34      /**
35       * UTF-8.
36       */
37      static final ZipEncoding ZIP_ENCODING_UTF_8 = getZipEncoding(UTF_8);
38  
39      /**
40       * Instantiates a ZIP encoding. An NIO based character set encoder/decoder will be returned. As a special case, if the character set is UTF-8, the NIO
41       * encoder will be configured replace malformed and unmappable characters with '?'. This matches existing behavior from the older fallback encoder.
42       * <p>
43       * If the requested character set cannot be found, the platform default will be used instead.
44       * </p>
45       *
46       * @param charset The charset of the ZIP encoding. Specify {@code null} for the platform's default encoding.
47       * @return A ZIP encoding for the given encoding name.
48       * @since 1.26.0
49       */
50      public static ZipEncoding getZipEncoding(final Charset charset) {
51          return new NioZipEncoding(Charsets.toCharset(charset));
52      }
53  
54      /**
55       * Instantiates a ZIP encoding. An NIO based character set encoder/decoder will be returned. As a special case, if the character set is UTF-8, the NIO
56       * encoder will be configured replace malformed and unmappable characters with '?'. This matches existing behavior from the older fallback encoder.
57       * <p>
58       * If the requested character set cannot be found, the platform default will be used instead.
59       * </p>
60       *
61       * @param name The name of the ZIP encoding. Specify {@code null} for the platform's default encoding.
62       * @return A ZIP encoding for the given encoding name.
63       */
64      public static ZipEncoding getZipEncoding(final String name) {
65          return new NioZipEncoding(toSafeCharset(name));
66      }
67  
68      static ByteBuffer growBufferBy(final ByteBuffer buffer, final int increment) {
69          buffer.limit(buffer.position());
70          buffer.rewind();
71          final ByteBuffer on = ByteBuffer.allocate(buffer.capacity() + increment);
72          on.put(buffer);
73          return on;
74      }
75  
76      /**
77       * Tests whether a given encoding is UTF-8. If the given name is null, then check the platform's default encoding.
78       *
79       * @param charset If the given charset is null, then check the platform's default encoding.
80       */
81      static boolean isUTF8(final Charset charset) {
82          return isUTF8Alias(Charsets.toCharset(charset).name());
83      }
84  
85      /**
86       * Tests whether the given non-null charset name is a UTF-8 alias.
87       *
88       * @param name a non-null charset name.
89       * @return whether the given non-null charset name is a UTF-8 alias.
90       */
91      private static boolean isUTF8Alias(final String name) {
92          return UTF_8.name().equalsIgnoreCase(name) || UTF_8.aliases().stream().anyMatch(name::equalsIgnoreCase);
93      }
94  
95      /**
96       * Returns a Charset for the named charset. If the name cannot find a charset, return {@link Charset#defaultCharset()}.
97       *
98       * @param name The name of the requested charset, may be null.
99       * @return a Charset for the named charset.
100      * @see Charset#defaultCharset()
101      */
102     private static Charset toSafeCharset(final String name) {
103         try {
104             return Charsets.toCharset(name);
105         } catch (final IllegalArgumentException | NullPointerException ignored) {
106             return Charset.defaultCharset();
107         }
108     }
109 }