001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *   https://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors;
020
021import java.util.Collections;
022import java.util.HashMap;
023import java.util.Map;
024
025import org.apache.commons.lang3.StringUtils;
026
027/**
028 * File name mapping code for the compression formats.
029 *
030 * @ThreadSafe
031 * @since 1.4
032 */
033public class FileNameUtil {
034
035    /**
036     * Map from common file name suffixes to the suffixes that identify compressed versions of those file types. For example: from ".tar" to ".tgz".
037     */
038    private final Map<String, String> compressSuffix = new HashMap<>();
039
040    /**
041     * Map from common file name suffixes of compressed files to the corresponding suffixes of uncompressed files. For example: from ".tgz" to ".tar".
042     * <p>
043     * This map also contains format-specific suffixes like ".gz" and "-z". These suffixes are mapped to the empty string, as they should simply be removed from
044     * the file name when the file is uncompressed.
045     */
046    private final Map<String, String> uncompressSuffix;
047
048    /**
049     * Length of the longest compressed suffix.
050     */
051    private final int longestCompressedSuffix;
052
053    /**
054     * Length of the shortest compressed suffix.
055     */
056    private final int shortestCompressedSuffix;
057
058    /**
059     * Length of the longest uncompressed suffix.
060     */
061    private final int longestUncompressedSuffix;
062
063    /**
064     * Length of the shortest uncompressed suffix longer than the empty string.
065     */
066    private final int shortestUncompressedSuffix;
067
068    /**
069     * The format's default extension.
070     */
071    private final String defaultExtension;
072
073    /**
074     * sets up the utility with a map of known compressed to uncompressed suffix mappings and the default extension of the format.
075     *
076     * @param uncompressSuffix Map from common file name suffixes of compressed files to the corresponding suffixes of uncompressed files. For example: from
077     *                         ".tgz" to ".tar". This map also contains format-specific suffixes like ".gz" and "-z". These suffixes are mapped to the empty
078     *                         string, as they should simply be removed from the file name when the file is uncompressed.
079     *
080     * @param defaultExtension the format's default extension like ".gz"
081     */
082    public FileNameUtil(final Map<String, String> uncompressSuffix, final String defaultExtension) {
083        this.uncompressSuffix = Collections.unmodifiableMap(uncompressSuffix);
084        int lc = Integer.MIN_VALUE;
085        int sc = Integer.MAX_VALUE;
086        int lu = Integer.MIN_VALUE;
087        int su = Integer.MAX_VALUE;
088        for (final Map.Entry<String, String> ent : uncompressSuffix.entrySet()) {
089            final int cl = ent.getKey().length();
090            if (cl > lc) {
091                lc = cl;
092            }
093            if (cl < sc) {
094                sc = cl;
095            }
096
097            final String u = ent.getValue();
098            final int ul = u.length();
099            if (ul > 0) {
100                compressSuffix.computeIfAbsent(u, k -> ent.getKey());
101                if (ul > lu) {
102                    lu = ul;
103                }
104                if (ul < su) {
105                    su = ul;
106                }
107            }
108        }
109        longestCompressedSuffix = lc;
110        longestUncompressedSuffix = lu;
111        shortestCompressedSuffix = sc;
112        shortestUncompressedSuffix = su;
113        this.defaultExtension = defaultExtension;
114    }
115
116    /**
117     * Maps the given file name to the name that the file should have after compression. Common file types with custom suffixes for compressed versions are
118     * automatically detected and correctly mapped. For example the name "package.tar" is mapped to "package.tgz". If no custom mapping is applicable, then the
119     * default ".gz" suffix is appended to the file name.
120     *
121     * @param fileName name of a file
122     * @return name of the corresponding compressed file
123     * @deprecated Use {@link #getCompressedFileName(String)}.
124     */
125    @Deprecated
126    public String getCompressedFilename(final String fileName) {
127        return getCompressedFileName(fileName);
128    }
129
130    /**
131     * Maps the given file name to the name that the file should have after compression. Common file types with custom suffixes for compressed versions are
132     * automatically detected and correctly mapped. For example the name "package.tar" is mapped to "package.tgz". If no custom mapping is applicable, then the
133     * default ".gz" suffix is appended to the file name.
134     *
135     * @param fileName name of a file
136     * @return name of the corresponding compressed file
137     * @since 1.25.0
138     */
139    public String getCompressedFileName(final String fileName) {
140        final String lower = StringUtils.toRootLowerCase(fileName);
141        final int n = lower.length();
142        for (int i = shortestUncompressedSuffix; i <= longestUncompressedSuffix && i < n; i++) {
143            final String suffix = compressSuffix.get(lower.substring(n - i));
144            if (suffix != null) {
145                return fileName.substring(0, n - i) + suffix;
146            }
147        }
148        // No custom suffix found, just append the default
149        return fileName + defaultExtension;
150    }
151
152    /**
153     * Maps the given name of a compressed file to the name that the file should have after uncompression. Commonly used file type specific suffixes like ".tgz"
154     * or ".svgz" are automatically detected and correctly mapped. For example the name "package.tgz" is mapped to "package.tar". And any file names with the
155     * generic ".gz" suffix (or any other generic gzip suffix) is mapped to a name without that suffix. If no format suffix is detected, then the file name is
156     * returned unmapped.
157     *
158     * @param fileName name of a file
159     * @return name of the corresponding uncompressed file
160     * @deprecated Use {@link #getUncompressedFileName(String)}.
161     */
162    @Deprecated
163    public String getUncompressedFilename(final String fileName) {
164        return getUncompressedFileName(fileName);
165    }
166
167    /**
168     * Maps the given name of a compressed file to the name that the file should have after uncompression. Commonly used file type specific suffixes like ".tgz"
169     * or ".svgz" are automatically detected and correctly mapped. For example the name "package.tgz" is mapped to "package.tar". And any file names with the
170     * generic ".gz" suffix (or any other generic gzip suffix) is mapped to a name without that suffix. If no format suffix is detected, then the file name is
171     * returned unmapped.
172     *
173     * @param fileName name of a file
174     * @return name of the corresponding uncompressed file
175     * @since 1.25.0
176     */
177    public String getUncompressedFileName(final String fileName) {
178        final String lower = StringUtils.toRootLowerCase(fileName);
179        final int n = lower.length();
180        for (int i = shortestCompressedSuffix; i <= longestCompressedSuffix && i < n; i++) {
181            final String suffix = uncompressSuffix.get(lower.substring(n - i));
182            if (suffix != null) {
183                return fileName.substring(0, n - i) + suffix;
184            }
185        }
186        return fileName;
187    }
188
189    /**
190     * Detects common format suffixes in the given file name.
191     *
192     * @param fileName name of a file
193     * @return {@code true} if the file name has a common format suffix, {@code false} otherwise
194     * @deprecated Use {@link #isCompressedFileName(String)}.
195     */
196    @Deprecated
197    public boolean isCompressedFilename(final String fileName) {
198        return isCompressedFileName(fileName);
199    }
200
201    /**
202     * Detects common format suffixes in the given file name.
203     *
204     * @param fileName name of a file
205     * @return {@code true} if the file name has a common format suffix, {@code false} otherwise
206     * @since 1.25.0
207     */
208    public boolean isCompressedFileName(final String fileName) {
209        final String lower = StringUtils.toRootLowerCase(fileName);
210        final int n = lower.length();
211        for (int i = shortestCompressedSuffix; i <= longestCompressedSuffix && i < n; i++) {
212            if (uncompressSuffix.containsKey(lower.substring(n - i))) {
213                return true;
214            }
215        }
216        return false;
217    }
218}