001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * https://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors; 020 021import java.util.Collections; 022import java.util.HashMap; 023import java.util.Map; 024 025import org.apache.commons.lang3.StringUtils; 026 027/** 028 * File name mapping code for the compression formats. 029 * 030 * @ThreadSafe 031 * @since 1.4 032 */ 033public class FileNameUtil { 034 035 /** 036 * Map from common file name suffixes to the suffixes that identify compressed versions of those file types. For example: from ".tar" to ".tgz". 037 */ 038 private final Map<String, String> compressSuffix = new HashMap<>(); 039 040 /** 041 * Map from common file name suffixes of compressed files to the corresponding suffixes of uncompressed files. For example: from ".tgz" to ".tar". 042 * <p> 043 * This map also contains format-specific suffixes like ".gz" and "-z". These suffixes are mapped to the empty string, as they should simply be removed from 044 * the file name when the file is uncompressed. 045 */ 046 private final Map<String, String> uncompressSuffix; 047 048 /** 049 * Length of the longest compressed suffix. 050 */ 051 private final int longestCompressedSuffix; 052 053 /** 054 * Length of the shortest compressed suffix. 055 */ 056 private final int shortestCompressedSuffix; 057 058 /** 059 * Length of the longest uncompressed suffix. 060 */ 061 private final int longestUncompressedSuffix; 062 063 /** 064 * Length of the shortest uncompressed suffix longer than the empty string. 065 */ 066 private final int shortestUncompressedSuffix; 067 068 /** 069 * The format's default extension. 070 */ 071 private final String defaultExtension; 072 073 /** 074 * sets up the utility with a map of known compressed to uncompressed suffix mappings and the default extension of the format. 075 * 076 * @param uncompressSuffix Map from common file name suffixes of compressed files to the corresponding suffixes of uncompressed files. For example: from 077 * ".tgz" to ".tar". This map also contains format-specific suffixes like ".gz" and "-z". These suffixes are mapped to the empty 078 * string, as they should simply be removed from the file name when the file is uncompressed. 079 * 080 * @param defaultExtension the format's default extension like ".gz" 081 */ 082 public FileNameUtil(final Map<String, String> uncompressSuffix, final String defaultExtension) { 083 this.uncompressSuffix = Collections.unmodifiableMap(uncompressSuffix); 084 int lc = Integer.MIN_VALUE; 085 int sc = Integer.MAX_VALUE; 086 int lu = Integer.MIN_VALUE; 087 int su = Integer.MAX_VALUE; 088 for (final Map.Entry<String, String> ent : uncompressSuffix.entrySet()) { 089 final int cl = ent.getKey().length(); 090 if (cl > lc) { 091 lc = cl; 092 } 093 if (cl < sc) { 094 sc = cl; 095 } 096 097 final String u = ent.getValue(); 098 final int ul = u.length(); 099 if (ul > 0) { 100 compressSuffix.computeIfAbsent(u, k -> ent.getKey()); 101 if (ul > lu) { 102 lu = ul; 103 } 104 if (ul < su) { 105 su = ul; 106 } 107 } 108 } 109 longestCompressedSuffix = lc; 110 longestUncompressedSuffix = lu; 111 shortestCompressedSuffix = sc; 112 shortestUncompressedSuffix = su; 113 this.defaultExtension = defaultExtension; 114 } 115 116 /** 117 * Maps the given file name to the name that the file should have after compression. Common file types with custom suffixes for compressed versions are 118 * automatically detected and correctly mapped. For example the name "package.tar" is mapped to "package.tgz". If no custom mapping is applicable, then the 119 * default ".gz" suffix is appended to the file name. 120 * 121 * @param fileName name of a file 122 * @return name of the corresponding compressed file 123 * @deprecated Use {@link #getCompressedFileName(String)}. 124 */ 125 @Deprecated 126 public String getCompressedFilename(final String fileName) { 127 return getCompressedFileName(fileName); 128 } 129 130 /** 131 * Maps the given file name to the name that the file should have after compression. Common file types with custom suffixes for compressed versions are 132 * automatically detected and correctly mapped. For example the name "package.tar" is mapped to "package.tgz". If no custom mapping is applicable, then the 133 * default ".gz" suffix is appended to the file name. 134 * 135 * @param fileName name of a file 136 * @return name of the corresponding compressed file 137 * @since 1.25.0 138 */ 139 public String getCompressedFileName(final String fileName) { 140 final String lower = StringUtils.toRootLowerCase(fileName); 141 final int n = lower.length(); 142 for (int i = shortestUncompressedSuffix; i <= longestUncompressedSuffix && i < n; i++) { 143 final String suffix = compressSuffix.get(lower.substring(n - i)); 144 if (suffix != null) { 145 return fileName.substring(0, n - i) + suffix; 146 } 147 } 148 // No custom suffix found, just append the default 149 return fileName + defaultExtension; 150 } 151 152 /** 153 * Maps the given name of a compressed file to the name that the file should have after uncompression. Commonly used file type specific suffixes like ".tgz" 154 * or ".svgz" are automatically detected and correctly mapped. For example the name "package.tgz" is mapped to "package.tar". And any file names with the 155 * generic ".gz" suffix (or any other generic gzip suffix) is mapped to a name without that suffix. If no format suffix is detected, then the file name is 156 * returned unmapped. 157 * 158 * @param fileName name of a file 159 * @return name of the corresponding uncompressed file 160 * @deprecated Use {@link #getUncompressedFileName(String)}. 161 */ 162 @Deprecated 163 public String getUncompressedFilename(final String fileName) { 164 return getUncompressedFileName(fileName); 165 } 166 167 /** 168 * Maps the given name of a compressed file to the name that the file should have after uncompression. Commonly used file type specific suffixes like ".tgz" 169 * or ".svgz" are automatically detected and correctly mapped. For example the name "package.tgz" is mapped to "package.tar". And any file names with the 170 * generic ".gz" suffix (or any other generic gzip suffix) is mapped to a name without that suffix. If no format suffix is detected, then the file name is 171 * returned unmapped. 172 * 173 * @param fileName name of a file 174 * @return name of the corresponding uncompressed file 175 * @since 1.25.0 176 */ 177 public String getUncompressedFileName(final String fileName) { 178 final String lower = StringUtils.toRootLowerCase(fileName); 179 final int n = lower.length(); 180 for (int i = shortestCompressedSuffix; i <= longestCompressedSuffix && i < n; i++) { 181 final String suffix = uncompressSuffix.get(lower.substring(n - i)); 182 if (suffix != null) { 183 return fileName.substring(0, n - i) + suffix; 184 } 185 } 186 return fileName; 187 } 188 189 /** 190 * Detects common format suffixes in the given file name. 191 * 192 * @param fileName name of a file 193 * @return {@code true} if the file name has a common format suffix, {@code false} otherwise 194 * @deprecated Use {@link #isCompressedFileName(String)}. 195 */ 196 @Deprecated 197 public boolean isCompressedFilename(final String fileName) { 198 return isCompressedFileName(fileName); 199 } 200 201 /** 202 * Detects common format suffixes in the given file name. 203 * 204 * @param fileName name of a file 205 * @return {@code true} if the file name has a common format suffix, {@code false} otherwise 206 * @since 1.25.0 207 */ 208 public boolean isCompressedFileName(final String fileName) { 209 final String lower = StringUtils.toRootLowerCase(fileName); 210 final int n = lower.length(); 211 for (int i = shortestCompressedSuffix; i <= longestCompressedSuffix && i < n; i++) { 212 if (uncompressSuffix.containsKey(lower.substring(n - i))) { 213 return true; 214 } 215 } 216 return false; 217 } 218}