1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.commons.compress.archivers.zip; 19 20 import static java.nio.charset.StandardCharsets.UTF_8; 21 22 import java.nio.ByteBuffer; 23 import java.nio.charset.Charset; 24 import java.nio.charset.StandardCharsets; 25 import java.nio.charset.UnsupportedCharsetException; 26 27 import org.apache.commons.io.Charsets; 28 29 /** 30 * Static helper functions for robustly encoding file names in ZIP files. 31 */ 32 public abstract class ZipEncodingHelper { 33 34 /** 35 * UTF-8. 36 */ 37 static final ZipEncoding ZIP_ENCODING_UTF_8 = getZipEncoding(StandardCharsets.UTF_8); 38 39 /** 40 * Instantiates a ZIP encoding. An NIO based character set encoder/decoder will be returned. As a special case, if the character set is UTF-8, the NIO 41 * encoder will be configured replace malformed and unmappable characters with '?'. This matches existing behavior from the older fallback encoder. 42 * <p> 43 * If the requested character set cannot be found, the platform default will be used instead. 44 * </p> 45 * 46 * @param charset The charset of the ZIP encoding. Specify {@code null} for the platform's default encoding. 47 * @return A ZIP encoding for the given encoding name. 48 * @since 1.26.0 49 */ 50 public static ZipEncoding getZipEncoding(final Charset charset) { 51 return new NioZipEncoding(Charsets.toCharset(charset), isUTF8(Charsets.toCharset(charset))); 52 } 53 54 /** 55 * Instantiates a ZIP encoding. An NIO based character set encoder/decoder will be returned. As a special case, if the character set is UTF-8, the NIO 56 * encoder will be configured replace malformed and unmappable characters with '?'. This matches existing behavior from the older fallback encoder. 57 * <p> 58 * If the requested character set cannot be found, the platform default will be used instead. 59 * </p> 60 * 61 * @param name The name of the ZIP encoding. Specify {@code null} for the platform's default encoding. 62 * @return A ZIP encoding for the given encoding name. 63 */ 64 public static ZipEncoding getZipEncoding(final String name) { 65 return new NioZipEncoding(toSafeCharset(name), isUTF8(toSafeCharset(name).name())); 66 } 67 68 static ByteBuffer growBufferBy(final ByteBuffer buffer, final int increment) { 69 buffer.limit(buffer.position()); 70 buffer.rewind(); 71 final ByteBuffer on = ByteBuffer.allocate(buffer.capacity() + increment); 72 on.put(buffer); 73 return on; 74 } 75 76 /** 77 * Tests whether a given encoding is UTF-8. If the given name is null, then check the platform's default encoding. 78 * 79 * @param charset If the given charset is null, then check the platform's default encoding. 80 */ 81 static boolean isUTF8(final Charset charset) { 82 return isUTF8Alias(Charsets.toCharset(charset).name()); 83 } 84 85 /** 86 * Tests whether a given encoding is UTF-8. If the given name is null, then check the platform's default encoding. 87 * 88 * @param charsetName If the given name is null, then check the platform's default encoding. 89 */ 90 static boolean isUTF8(final String charsetName) { 91 return isUTF8Alias(charsetName != null ? charsetName : Charset.defaultCharset().name()); 92 } 93 94 private static boolean isUTF8Alias(final String actual) { 95 return UTF_8.name().equalsIgnoreCase(actual) || UTF_8.aliases().stream().anyMatch(alias -> alias.equalsIgnoreCase(actual)); 96 } 97 98 private static Charset toSafeCharset(final String name) { 99 Charset charset = Charset.defaultCharset(); 100 try { 101 charset = Charsets.toCharset(name); 102 } catch (final UnsupportedCharsetException ignore) { // NOSONAR we use the default encoding instead 103 } 104 return charset; 105 } 106 }