001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *   https://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019
020package org.apache.commons.compress.archivers.zip;
021
022import static java.nio.charset.StandardCharsets.UTF_8;
023
024import java.nio.ByteBuffer;
025import java.nio.charset.Charset;
026
027import org.apache.commons.io.Charsets;
028
029/**
030 * Static helper functions for robustly encoding file names in ZIP files.
031 */
032public abstract class ZipEncodingHelper {
033
034    /**
035     * UTF-8.
036     */
037    static final ZipEncoding ZIP_ENCODING_UTF_8 = getZipEncoding(UTF_8);
038
039    /**
040     * Instantiates a ZIP encoding. An NIO based character set encoder/decoder will be returned. As a special case, if the character set is UTF-8, the NIO
041     * encoder will be configured replace malformed and unmappable characters with '?'. This matches existing behavior from the older fallback encoder.
042     * <p>
043     * If the requested character set cannot be found, the platform default will be used instead.
044     * </p>
045     *
046     * @param charset The charset of the ZIP encoding. Specify {@code null} for the platform's default encoding.
047     * @return A ZIP encoding for the given encoding name.
048     * @since 1.26.0
049     */
050    public static ZipEncoding getZipEncoding(final Charset charset) {
051        return new NioZipEncoding(Charsets.toCharset(charset));
052    }
053
054    /**
055     * Instantiates a ZIP encoding. An NIO based character set encoder/decoder will be returned. As a special case, if the character set is UTF-8, the NIO
056     * encoder will be configured replace malformed and unmappable characters with '?'. This matches existing behavior from the older fallback encoder.
057     * <p>
058     * If the requested character set cannot be found, the platform default will be used instead.
059     * </p>
060     *
061     * @param name The name of the ZIP encoding. Specify {@code null} for the platform's default encoding.
062     * @return A ZIP encoding for the given encoding name.
063     */
064    public static ZipEncoding getZipEncoding(final String name) {
065        return new NioZipEncoding(toSafeCharset(name));
066    }
067
068    static ByteBuffer growBufferBy(final ByteBuffer buffer, final int increment) {
069        buffer.limit(buffer.position());
070        buffer.rewind();
071        final ByteBuffer on = ByteBuffer.allocate(buffer.capacity() + increment);
072        on.put(buffer);
073        return on;
074    }
075
076    /**
077     * Tests whether a given encoding is UTF-8. If the given name is null, then check the platform's default encoding.
078     *
079     * @param charset If the given charset is null, then check the platform's default encoding.
080     */
081    static boolean isUTF8(final Charset charset) {
082        return isUTF8Alias(Charsets.toCharset(charset).name());
083    }
084
085    /**
086     * Tests whether the given non-null charset name is a UTF-8 alias.
087     *
088     * @param name a non-null charset name.
089     * @return whether the given non-null charset name is a UTF-8 alias.
090     */
091    private static boolean isUTF8Alias(final String name) {
092        return UTF_8.name().equalsIgnoreCase(name) || UTF_8.aliases().stream().anyMatch(name::equalsIgnoreCase);
093    }
094
095    /**
096     * Returns a Charset for the named charset. If the name cannot find a charset, return {@link Charset#defaultCharset()}.
097     *
098     * @param name The name of the requested charset, may be null.
099     * @return a Charset for the named charset.
100     * @see Charset#defaultCharset()
101     */
102    private static Charset toSafeCharset(final String name) {
103        try {
104            return Charsets.toCharset(name);
105        } catch (final IllegalArgumentException | NullPointerException ignored) {
106            return Charset.defaultCharset();
107        }
108    }
109}