ZipEncodingHelper.java

  1. /*
  2.  *  Licensed to the Apache Software Foundation (ASF) under one or more
  3.  *  contributor license agreements.  See the NOTICE file distributed with
  4.  *  this work for additional information regarding copyright ownership.
  5.  *  The ASF licenses this file to You under the Apache License, Version 2.0
  6.  *  (the "License"); you may not use this file except in compliance with
  7.  *  the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  *  Unless required by applicable law or agreed to in writing, software
  12.  *  distributed under the License is distributed on an "AS IS" BASIS,
  13.  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  *  See the License for the specific language governing permissions and
  15.  *  limitations under the License.
  16.  */

  17. package org.apache.commons.compress.archivers.zip;

  18. import static java.nio.charset.StandardCharsets.UTF_8;

  19. import java.nio.ByteBuffer;
  20. import java.nio.charset.Charset;
  21. import java.nio.charset.UnsupportedCharsetException;

  22. import org.apache.commons.io.Charsets;

  23. /**
  24.  * Static helper functions for robustly encoding file names in ZIP files.
  25.  */
  26. public abstract class ZipEncodingHelper {

  27.     /**
  28.      * UTF-8.
  29.      */
  30.     static final ZipEncoding ZIP_ENCODING_UTF_8 = getZipEncoding(UTF_8);

  31.     /**
  32.      * Instantiates a ZIP encoding. An NIO based character set encoder/decoder will be returned. As a special case, if the character set is UTF-8, the NIO
  33.      * encoder will be configured replace malformed and unmappable characters with '?'. This matches existing behavior from the older fallback encoder.
  34.      * <p>
  35.      * If the requested character set cannot be found, the platform default will be used instead.
  36.      * </p>
  37.      *
  38.      * @param charset The charset of the ZIP encoding. Specify {@code null} for the platform's default encoding.
  39.      * @return A ZIP encoding for the given encoding name.
  40.      * @since 1.26.0
  41.      */
  42.     public static ZipEncoding getZipEncoding(final Charset charset) {
  43.         return new NioZipEncoding(Charsets.toCharset(charset), isUTF8(Charsets.toCharset(charset)));
  44.     }

  45.     /**
  46.      * Instantiates a ZIP encoding. An NIO based character set encoder/decoder will be returned. As a special case, if the character set is UTF-8, the NIO
  47.      * encoder will be configured replace malformed and unmappable characters with '?'. This matches existing behavior from the older fallback encoder.
  48.      * <p>
  49.      * If the requested character set cannot be found, the platform default will be used instead.
  50.      * </p>
  51.      *
  52.      * @param name The name of the ZIP encoding. Specify {@code null} for the platform's default encoding.
  53.      * @return A ZIP encoding for the given encoding name.
  54.      */
  55.     public static ZipEncoding getZipEncoding(final String name) {
  56.         return new NioZipEncoding(toSafeCharset(name), isUTF8(toSafeCharset(name).name()));
  57.     }

  58.     static ByteBuffer growBufferBy(final ByteBuffer buffer, final int increment) {
  59.         buffer.limit(buffer.position());
  60.         buffer.rewind();
  61.         final ByteBuffer on = ByteBuffer.allocate(buffer.capacity() + increment);
  62.         on.put(buffer);
  63.         return on;
  64.     }

  65.     /**
  66.      * Tests whether a given encoding is UTF-8. If the given name is null, then check the platform's default encoding.
  67.      *
  68.      * @param charset If the given charset is null, then check the platform's default encoding.
  69.      */
  70.     static boolean isUTF8(final Charset charset) {
  71.         return isUTF8Alias(Charsets.toCharset(charset).name());
  72.     }

  73.     /**
  74.      * Tests whether a given encoding is UTF-8. If the given name is null, then check the platform's default encoding.
  75.      *
  76.      * @param charsetName If the given name is null, then check the platform's default encoding.
  77.      */
  78.     static boolean isUTF8(final String charsetName) {
  79.         return isUTF8Alias(charsetName != null ? charsetName : Charset.defaultCharset().name());
  80.     }

  81.     private static boolean isUTF8Alias(final String actual) {
  82.         return UTF_8.name().equalsIgnoreCase(actual) || UTF_8.aliases().stream().anyMatch(alias -> alias.equalsIgnoreCase(actual));
  83.     }

  84.     private static Charset toSafeCharset(final String name) {
  85.         Charset charset = Charset.defaultCharset();
  86.         try {
  87.             charset = Charsets.toCharset(name);
  88.         } catch (final UnsupportedCharsetException ignored) {
  89.             // Use the default encoding instead.
  90.         }
  91.         return charset;
  92.     }
  93. }