001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * https://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019 020package org.apache.commons.compress.archivers.zip; 021 022import static java.nio.charset.StandardCharsets.UTF_8; 023 024import java.nio.ByteBuffer; 025import java.nio.charset.Charset; 026 027import org.apache.commons.io.Charsets; 028 029/** 030 * Static helper functions for robustly encoding file names in ZIP files. 031 */ 032public abstract class ZipEncodingHelper { 033 034 /** 035 * UTF-8. 036 */ 037 static final ZipEncoding ZIP_ENCODING_UTF_8 = getZipEncoding(UTF_8); 038 039 /** 040 * Instantiates a ZIP encoding. An NIO based character set encoder/decoder will be returned. As a special case, if the character set is UTF-8, the NIO 041 * encoder will be configured replace malformed and unmappable characters with '?'. This matches existing behavior from the older fallback encoder. 042 * <p> 043 * If the requested character set cannot be found, the platform default will be used instead. 044 * </p> 045 * 046 * @param charset The charset of the ZIP encoding. Specify {@code null} for the platform's default encoding. 047 * @return A ZIP encoding for the given encoding name. 048 * @since 1.26.0 049 */ 050 public static ZipEncoding getZipEncoding(final Charset charset) { 051 return new NioZipEncoding(Charsets.toCharset(charset)); 052 } 053 054 /** 055 * Instantiates a ZIP encoding. An NIO based character set encoder/decoder will be returned. As a special case, if the character set is UTF-8, the NIO 056 * encoder will be configured replace malformed and unmappable characters with '?'. This matches existing behavior from the older fallback encoder. 057 * <p> 058 * If the requested character set cannot be found, the platform default will be used instead. 059 * </p> 060 * 061 * @param name The name of the ZIP encoding. Specify {@code null} for the platform's default encoding. 062 * @return A ZIP encoding for the given encoding name. 063 */ 064 public static ZipEncoding getZipEncoding(final String name) { 065 return new NioZipEncoding(toSafeCharset(name)); 066 } 067 068 static ByteBuffer growBufferBy(final ByteBuffer buffer, final int increment) { 069 buffer.limit(buffer.position()); 070 buffer.rewind(); 071 final ByteBuffer on = ByteBuffer.allocate(buffer.capacity() + increment); 072 on.put(buffer); 073 return on; 074 } 075 076 /** 077 * Tests whether a given encoding is UTF-8. If the given name is null, then check the platform's default encoding. 078 * 079 * @param charset If the given charset is null, then check the platform's default encoding. 080 */ 081 static boolean isUTF8(final Charset charset) { 082 return isUTF8Alias(Charsets.toCharset(charset).name()); 083 } 084 085 /** 086 * Tests whether the given non-null charset name is a UTF-8 alias. 087 * 088 * @param name a non-null charset name. 089 * @return whether the given non-null charset name is a UTF-8 alias. 090 */ 091 private static boolean isUTF8Alias(final String name) { 092 return UTF_8.name().equalsIgnoreCase(name) || UTF_8.aliases().stream().anyMatch(name::equalsIgnoreCase); 093 } 094 095 /** 096 * Returns a Charset for the named charset. If the name cannot find a charset, return {@link Charset#defaultCharset()}. 097 * 098 * @param name The name of the requested charset, may be null. 099 * @return a Charset for the named charset. 100 * @see Charset#defaultCharset() 101 */ 102 private static Charset toSafeCharset(final String name) { 103 try { 104 return Charsets.toCharset(name); 105 } catch (final IllegalArgumentException | NullPointerException ignored) { 106 return Charset.defaultCharset(); 107 } 108 } 109}