FileNameUtil.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one
  3.  * or more contributor license agreements.  See the NOTICE file
  4.  * distributed with this work for additional information
  5.  * regarding copyright ownership.  The ASF licenses this file
  6.  * to you under the Apache License, Version 2.0 (the
  7.  * "License"); you may not use this file except in compliance
  8.  * with the License.  You may obtain a copy of the License at
  9.  *
  10.  * http://www.apache.org/licenses/LICENSE-2.0
  11.  *
  12.  * Unless required by applicable law or agreed to in writing,
  13.  * software distributed under the License is distributed on an
  14.  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15.  * KIND, either express or implied.  See the License for the
  16.  * specific language governing permissions and limitations
  17.  * under the License.
  18.  */
  19. package org.apache.commons.compress.compressors;

  20. import java.util.Collections;
  21. import java.util.HashMap;
  22. import java.util.Locale;
  23. import java.util.Map;

  24. /**
  25.  * File name mapping code for the compression formats.
  26.  *
  27.  * @ThreadSafe
  28.  * @since 1.4
  29.  */
  30. public class FileNameUtil {

  31.     /**
  32.      * Map from common file name suffixes to the suffixes that identify compressed versions of those file types. For example: from ".tar" to ".tgz".
  33.      */
  34.     private final Map<String, String> compressSuffix = new HashMap<>();

  35.     /**
  36.      * Map from common file name suffixes of compressed files to the corresponding suffixes of uncompressed files. For example: from ".tgz" to ".tar".
  37.      * <p>
  38.      * This map also contains format-specific suffixes like ".gz" and "-z". These suffixes are mapped to the empty string, as they should simply be removed from
  39.      * the file name when the file is uncompressed.
  40.      */
  41.     private final Map<String, String> uncompressSuffix;

  42.     /**
  43.      * Length of the longest compressed suffix.
  44.      */
  45.     private final int longestCompressedSuffix;

  46.     /**
  47.      * Length of the shortest compressed suffix.
  48.      */
  49.     private final int shortestCompressedSuffix;

  50.     /**
  51.      * Length of the longest uncompressed suffix.
  52.      */
  53.     private final int longestUncompressedSuffix;

  54.     /**
  55.      * Length of the shortest uncompressed suffix longer than the empty string.
  56.      */
  57.     private final int shortestUncompressedSuffix;

  58.     /**
  59.      * The format's default extension.
  60.      */
  61.     private final String defaultExtension;

  62.     /**
  63.      * sets up the utility with a map of known compressed to uncompressed suffix mappings and the default extension of the format.
  64.      *
  65.      * @param uncompressSuffix Map from common file name suffixes of compressed files to the corresponding suffixes of uncompressed files. For example: from
  66.      *                         ".tgz" to ".tar". This map also contains format-specific suffixes like ".gz" and "-z". These suffixes are mapped to the empty
  67.      *                         string, as they should simply be removed from the file name when the file is uncompressed.
  68.      *
  69.      * @param defaultExtension the format's default extension like ".gz"
  70.      */
  71.     public FileNameUtil(final Map<String, String> uncompressSuffix, final String defaultExtension) {
  72.         this.uncompressSuffix = Collections.unmodifiableMap(uncompressSuffix);
  73.         int lc = Integer.MIN_VALUE, sc = Integer.MAX_VALUE;
  74.         int lu = Integer.MIN_VALUE, su = Integer.MAX_VALUE;
  75.         for (final Map.Entry<String, String> ent : uncompressSuffix.entrySet()) {
  76.             final int cl = ent.getKey().length();
  77.             if (cl > lc) {
  78.                 lc = cl;
  79.             }
  80.             if (cl < sc) {
  81.                 sc = cl;
  82.             }

  83.             final String u = ent.getValue();
  84.             final int ul = u.length();
  85.             if (ul > 0) {
  86.                 compressSuffix.computeIfAbsent(u, k -> ent.getKey());
  87.                 if (ul > lu) {
  88.                     lu = ul;
  89.                 }
  90.                 if (ul < su) {
  91.                     su = ul;
  92.                 }
  93.             }
  94.         }
  95.         longestCompressedSuffix = lc;
  96.         longestUncompressedSuffix = lu;
  97.         shortestCompressedSuffix = sc;
  98.         shortestUncompressedSuffix = su;
  99.         this.defaultExtension = defaultExtension;
  100.     }

  101.     /**
  102.      * Maps the given file name to the name that the file should have after compression. Common file types with custom suffixes for compressed versions are
  103.      * automatically detected and correctly mapped. For example the name "package.tar" is mapped to "package.tgz". If no custom mapping is applicable, then the
  104.      * default ".gz" suffix is appended to the file name.
  105.      *
  106.      * @param fileName name of a file
  107.      * @return name of the corresponding compressed file
  108.      * @deprecated Use {@link #getCompressedFileName(String)}.
  109.      */
  110.     @Deprecated
  111.     public String getCompressedFilename(final String fileName) {
  112.         return getCompressedFileName(fileName);
  113.     }

  114.     /**
  115.      * Maps the given file name to the name that the file should have after compression. Common file types with custom suffixes for compressed versions are
  116.      * automatically detected and correctly mapped. For example the name "package.tar" is mapped to "package.tgz". If no custom mapping is applicable, then the
  117.      * default ".gz" suffix is appended to the file name.
  118.      *
  119.      * @param fileName name of a file
  120.      * @return name of the corresponding compressed file
  121.      * @since 1.25.0
  122.      */
  123.     public String getCompressedFileName(final String fileName) {
  124.         final String lower = fileName.toLowerCase(Locale.ROOT);
  125.         final int n = lower.length();
  126.         for (int i = shortestUncompressedSuffix; i <= longestUncompressedSuffix && i < n; i++) {
  127.             final String suffix = compressSuffix.get(lower.substring(n - i));
  128.             if (suffix != null) {
  129.                 return fileName.substring(0, n - i) + suffix;
  130.             }
  131.         }
  132.         // No custom suffix found, just append the default
  133.         return fileName + defaultExtension;
  134.     }

  135.     /**
  136.      * Maps the given name of a compressed file to the name that the file should have after uncompression. Commonly used file type specific suffixes like ".tgz"
  137.      * or ".svgz" are automatically detected and correctly mapped. For example the name "package.tgz" is mapped to "package.tar". And any file names with the
  138.      * generic ".gz" suffix (or any other generic gzip suffix) is mapped to a name without that suffix. If no format suffix is detected, then the file name is
  139.      * returned unmapped.
  140.      *
  141.      * @param fileName name of a file
  142.      * @return name of the corresponding uncompressed file
  143.      * @deprecated Use {@link #getUncompressedFileName(String)}.
  144.      */
  145.     @Deprecated
  146.     public String getUncompressedFilename(final String fileName) {
  147.         return getUncompressedFileName(fileName);
  148.     }

  149.     /**
  150.      * Maps the given name of a compressed file to the name that the file should have after uncompression. Commonly used file type specific suffixes like ".tgz"
  151.      * or ".svgz" are automatically detected and correctly mapped. For example the name "package.tgz" is mapped to "package.tar". And any file names with the
  152.      * generic ".gz" suffix (or any other generic gzip suffix) is mapped to a name without that suffix. If no format suffix is detected, then the file name is
  153.      * returned unmapped.
  154.      *
  155.      * @param fileName name of a file
  156.      * @return name of the corresponding uncompressed file
  157.      * @since 1.25.0
  158.      */
  159.     public String getUncompressedFileName(final String fileName) {
  160.         final String lower = fileName.toLowerCase(Locale.ROOT);
  161.         final int n = lower.length();
  162.         for (int i = shortestCompressedSuffix; i <= longestCompressedSuffix && i < n; i++) {
  163.             final String suffix = uncompressSuffix.get(lower.substring(n - i));
  164.             if (suffix != null) {
  165.                 return fileName.substring(0, n - i) + suffix;
  166.             }
  167.         }
  168.         return fileName;
  169.     }

  170.     /**
  171.      * Detects common format suffixes in the given file name.
  172.      *
  173.      * @param fileName name of a file
  174.      * @return {@code true} if the file name has a common format suffix, {@code false} otherwise
  175.      * @deprecated Use {@link #isCompressedFileName(String)}.
  176.      */
  177.     @Deprecated
  178.     public boolean isCompressedFilename(final String fileName) {
  179.         return isCompressedFileName(fileName);
  180.     }

  181.     /**
  182.      * Detects common format suffixes in the given file name.
  183.      *
  184.      * @param fileName name of a file
  185.      * @return {@code true} if the file name has a common format suffix, {@code false} otherwise
  186.      * @since 1.25.0
  187.      */
  188.     public boolean isCompressedFileName(final String fileName) {
  189.         final String lower = fileName.toLowerCase(Locale.ROOT);
  190.         final int n = lower.length();
  191.         for (int i = shortestCompressedSuffix; i <= longestCompressedSuffix && i < n; i++) {
  192.             if (uncompressSuffix.containsKey(lower.substring(n - i))) {
  193.                 return true;
  194.             }
  195.         }
  196.         return false;
  197.     }
  198. }