View Javadoc
1   /*
2    *  Licensed to the Apache Software Foundation (ASF) under one or more
3    *  contributor license agreements.  See the NOTICE file distributed with
4    *  this work for additional information regarding copyright ownership.
5    *  The ASF licenses this file to You under the Apache License, Version 2.0
6    *  (the "License"); you may not use this file except in compliance with
7    *  the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   *  Unless required by applicable law or agreed to in writing, software
12   *  distributed under the License is distributed on an "AS IS" BASIS,
13   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   *  See the License for the specific language governing permissions and
15   *  limitations under the License.
16   */
17  
18  package org.apache.commons.compress.archivers.zip;
19  
20  import static java.nio.charset.StandardCharsets.UTF_8;
21  
22  import java.nio.ByteBuffer;
23  import java.nio.charset.Charset;
24  import java.nio.charset.StandardCharsets;
25  import java.nio.charset.UnsupportedCharsetException;
26  
27  import org.apache.commons.io.Charsets;
28  
29  /**
30   * Static helper functions for robustly encoding file names in ZIP files.
31   */
32  public abstract class ZipEncodingHelper {
33  
34      /**
35       * UTF-8.
36       */
37      static final ZipEncoding ZIP_ENCODING_UTF_8 = getZipEncoding(StandardCharsets.UTF_8);
38  
39      /**
40       * Instantiates a ZIP encoding. An NIO based character set encoder/decoder will be returned. As a special case, if the character set is UTF-8, the NIO
41       * encoder will be configured replace malformed and unmappable characters with '?'. This matches existing behavior from the older fallback encoder.
42       * <p>
43       * If the requested character set cannot be found, the platform default will be used instead.
44       * </p>
45       *
46       * @param charset The charset of the ZIP encoding. Specify {@code null} for the platform's default encoding.
47       * @return A ZIP encoding for the given encoding name.
48       * @since 1.26.0
49       */
50      public static ZipEncoding getZipEncoding(final Charset charset) {
51          return new NioZipEncoding(Charsets.toCharset(charset), isUTF8(Charsets.toCharset(charset)));
52      }
53  
54      /**
55       * Instantiates a ZIP encoding. An NIO based character set encoder/decoder will be returned. As a special case, if the character set is UTF-8, the NIO
56       * encoder will be configured replace malformed and unmappable characters with '?'. This matches existing behavior from the older fallback encoder.
57       * <p>
58       * If the requested character set cannot be found, the platform default will be used instead.
59       * </p>
60       *
61       * @param name The name of the ZIP encoding. Specify {@code null} for the platform's default encoding.
62       * @return A ZIP encoding for the given encoding name.
63       */
64      public static ZipEncoding getZipEncoding(final String name) {
65          return new NioZipEncoding(toSafeCharset(name), isUTF8(toSafeCharset(name).name()));
66      }
67  
68      static ByteBuffer growBufferBy(final ByteBuffer buffer, final int increment) {
69          buffer.limit(buffer.position());
70          buffer.rewind();
71          final ByteBuffer on = ByteBuffer.allocate(buffer.capacity() + increment);
72          on.put(buffer);
73          return on;
74      }
75  
76      /**
77       * Tests whether a given encoding is UTF-8. If the given name is null, then check the platform's default encoding.
78       *
79       * @param charset If the given charset is null, then check the platform's default encoding.
80       */
81      static boolean isUTF8(final Charset charset) {
82          return isUTF8Alias(Charsets.toCharset(charset).name());
83      }
84  
85      /**
86       * Tests whether a given encoding is UTF-8. If the given name is null, then check the platform's default encoding.
87       *
88       * @param charsetName If the given name is null, then check the platform's default encoding.
89       */
90      static boolean isUTF8(final String charsetName) {
91          return isUTF8Alias(charsetName != null ? charsetName : Charset.defaultCharset().name());
92      }
93  
94      private static boolean isUTF8Alias(final String actual) {
95          return UTF_8.name().equalsIgnoreCase(actual) || UTF_8.aliases().stream().anyMatch(alias -> alias.equalsIgnoreCase(actual));
96      }
97  
98      private static Charset toSafeCharset(final String name) {
99          Charset charset = Charset.defaultCharset();
100         try {
101             charset = Charsets.toCharset(name);
102         } catch (final UnsupportedCharsetException ignore) { // NOSONAR we use the default encoding instead
103         }
104         return charset;
105     }
106 }