GzipUtils.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one
  3.  * or more contributor license agreements.  See the NOTICE file
  4.  * distributed with this work for additional information
  5.  * regarding copyright ownership.  The ASF licenses this file
  6.  * to you under the Apache License, Version 2.0 (the
  7.  * "License"); you may not use this file except in compliance
  8.  * with the License.  You may obtain a copy of the License at
  9.  *
  10.  *   https://www.apache.org/licenses/LICENSE-2.0
  11.  *
  12.  * Unless required by applicable law or agreed to in writing,
  13.  * software distributed under the License is distributed on an
  14.  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15.  * KIND, either express or implied.  See the License for the
  16.  * specific language governing permissions and limitations
  17.  * under the License.
  18.  */
  19. package org.apache.commons.compress.compressors.gzip;

  20. import java.nio.charset.Charset;
  21. import java.nio.charset.StandardCharsets;
  22. import java.util.LinkedHashMap;
  23. import java.util.Map;

  24. import org.apache.commons.compress.compressors.FileNameUtil;

  25. /**
  26.  * Utility code for the GZIP compression format.
  27.  *
  28.  * @see <a href="https://datatracker.ietf.org/doc/html/rfc1952">RFC 1952 GZIP File Format Specification</a>
  29.  * @ThreadSafe
  30.  */
  31. public class GzipUtils {

  32.     /** Header flag indicating a comment follows the header. */
  33.     static final int FCOMMENT = 0x10;

  34.     /** Header flag indicating an EXTRA subfields collection follows the header. */
  35.     static final int FEXTRA = 0x04;

  36.     /** Header flag indicating a header CRC follows the header. */
  37.     static final int FHCRC = 0x02;

  38.     private static final FileNameUtil fileNameUtil;

  39.     /** Header flag indicating a file name follows the header. */
  40.     static final int FNAME = 0x08;

  41.     static final int FRESERVED = 0xE0;

  42.     /**
  43.      * Charset for file name and comments per the <a href="https://tools.ietf.org/html/rfc1952">GZIP File Format Specification</a>.
  44.      */
  45.     static final Charset GZIP_ENCODING = StandardCharsets.ISO_8859_1;

  46.     /**
  47.      * Member header ID1 (IDentification 1).
  48.      *
  49.      * See <a href="https://datatracker.ietf.org/doc/html/rfc1952#page-5">RFC1952</a> 2.3.1. Member header and trailer.
  50.      */
  51.     static final int ID1 = 31;

  52.     /**
  53.      * Member header ID2 (IDentification 2).
  54.      *
  55.      * See <a href="https://datatracker.ietf.org/doc/html/rfc1952#page-5">RFC1952</a> 2.3.1. Member header and trailer.
  56.      */
  57.     static final int ID2 = 139;

  58.     /**
  59.      * Member header XFL (eXtra FLags) when the "deflate" method (CM = 8) is set, then XFL = 2 means the compressor used maximum compression (slowest
  60.      * algorithm).
  61.      *
  62.      * See <a href="https://datatracker.ietf.org/doc/html/rfc1952#page-5">RFC1952</a> 2.3.1. Member header and trailer.
  63.      */
  64.     static final byte XFL_MAX_COMPRESSION = 2;

  65.     /**
  66.      * Member header XFL (eXtra FLags) when the "deflate" method (CM = 8) is set, then XFL = 4 means the compressor used the fastest algorithm.
  67.      *
  68.      * See <a href="https://datatracker.ietf.org/doc/html/rfc1952#page-5">RFC1952</a> 2.3.1. Member header and trailer.
  69.      */
  70.     static final byte XFL_MAX_SPEED = 4;

  71.     static final byte XFL_UNKNOWN = 0;

  72.     /**
  73.      * Using {@link LinkedHashMap} so {@code .tgz} is preferred over {@code .taz} as compressed extension of {@code .tar} as FileNameUtil will use the first one
  74.      * found.
  75.      */
  76.     static {
  77.         final Map<String, String> uncompressSuffix = new LinkedHashMap<>();
  78.         uncompressSuffix.put(".tgz", ".tar");
  79.         uncompressSuffix.put(".taz", ".tar");
  80.         uncompressSuffix.put(".svgz", ".svg");
  81.         uncompressSuffix.put(".cpgz", ".cpio");
  82.         uncompressSuffix.put(".wmz", ".wmf");
  83.         uncompressSuffix.put(".emz", ".emf");
  84.         uncompressSuffix.put(".gz", "");
  85.         uncompressSuffix.put(".z", "");
  86.         uncompressSuffix.put("-gz", "");
  87.         uncompressSuffix.put("-z", "");
  88.         uncompressSuffix.put("_z", "");
  89.         fileNameUtil = new FileNameUtil(uncompressSuffix, ".gz");
  90.     }
  91.     /**
  92.      * Maps the given file name to the name that the file should have after compression with gzip. Common file types with custom suffixes for compressed
  93.      * versions are automatically detected and correctly mapped. For example the name "package.tar" is mapped to "package.tgz". If no custom mapping is
  94.      * applicable, then the default ".gz" suffix is appended to the file name.
  95.      *
  96.      * @param fileName name of a file
  97.      * @return name of the corresponding compressed file
  98.      * @deprecated Use {@link #getCompressedFileName(String)}.
  99.      */
  100.     @Deprecated
  101.     public static String getCompressedFilename(final String fileName) {
  102.         return fileNameUtil.getCompressedFileName(fileName);
  103.     }

  104.     /**
  105.      * Maps the given file name to the name that the file should have after compression with gzip. Common file types with custom suffixes for compressed
  106.      * versions are automatically detected and correctly mapped. For example the name "package.tar" is mapped to "package.tgz". If no custom mapping is
  107.      * applicable, then the default ".gz" suffix is appended to the file name.
  108.      *
  109.      * @param fileName name of a file
  110.      * @return name of the corresponding compressed file
  111.      * @since 1.25.0
  112.      */
  113.     public static String getCompressedFileName(final String fileName) {
  114.         return fileNameUtil.getCompressedFileName(fileName);
  115.     }

  116.     /**
  117.      * Maps the given name of a gzip-compressed file to the name that the file should have after uncompression. Commonly used file type specific suffixes like
  118.      * ".tgz" or ".svgz" are automatically detected and correctly mapped. For example the name "package.tgz" is mapped to "package.tar". And any file names with
  119.      * the generic ".gz" suffix (or any other generic gzip suffix) is mapped to a name without that suffix. If no gzip suffix is detected, then the file name is
  120.      * returned unmapped.
  121.      *
  122.      * @param fileName name of a file
  123.      * @return name of the corresponding uncompressed file
  124.      * @deprecated Use {@link #getUncompressedFileName(String)}.
  125.      */
  126.     @Deprecated
  127.     public static String getUncompressedFilename(final String fileName) {
  128.         return fileNameUtil.getUncompressedFileName(fileName);
  129.     }

  130.     /**
  131.      * Maps the given name of a gzip-compressed file to the name that the file should have after uncompression. Commonly used file type specific suffixes like
  132.      * ".tgz" or ".svgz" are automatically detected and correctly mapped. For example the name "package.tgz" is mapped to "package.tar". And any file names with
  133.      * the generic ".gz" suffix (or any other generic gzip suffix) is mapped to a name without that suffix. If no gzip suffix is detected, then the file name is
  134.      * returned unmapped.
  135.      *
  136.      * @param fileName name of a file
  137.      * @return name of the corresponding uncompressed file
  138.      * @since 1.25.0
  139.      */
  140.     public static String getUncompressedFileName(final String fileName) {
  141.         return fileNameUtil.getUncompressedFileName(fileName);
  142.     }

  143.     /**
  144.      * Detects common gzip suffixes in the given file name.
  145.      *
  146.      * @param fileName name of a file
  147.      * @return {@code true} if the file name has a common gzip suffix, {@code false} otherwise
  148.      * @deprecated Use {@link #isCompressedFileName(String)}.
  149.      */
  150.     @Deprecated
  151.     public static boolean isCompressedFilename(final String fileName) {
  152.         return fileNameUtil.isCompressedFileName(fileName);
  153.     }

  154.     /**
  155.      * Detects common gzip suffixes in the given file name.
  156.      *
  157.      * @param fileName name of a file
  158.      * @return {@code true} if the file name has a common gzip suffix, {@code false} otherwise
  159.      * @since 1.25.0
  160.      */
  161.     public static boolean isCompressedFileName(final String fileName) {
  162.         return fileNameUtil.isCompressedFileName(fileName);
  163.     }

  164.     /** Private constructor to prevent instantiation of this utility class. */
  165.     private GzipUtils() {
  166.     }

  167. }