001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.gzip;
020
021import java.io.IOException;
022import java.io.OutputStream;
023import java.net.URI;
024import java.net.URISyntaxException;
025import java.nio.ByteBuffer;
026import java.nio.ByteOrder;
027import java.nio.charset.StandardCharsets;
028import java.util.zip.CRC32;
029import java.util.zip.Deflater;
030import java.util.zip.GZIPInputStream;
031import java.util.zip.GZIPOutputStream;
032
033import org.apache.commons.compress.compressors.CompressorOutputStream;
034
035/**
036 * Compressed output stream using the gzip format. This implementation improves over the standard {@link GZIPOutputStream} class by allowing the configuration
037 * of the compression level and the header metadata (file name, comment, modification time, operating system and extra flags).
038 *
039 * @see <a href="https://tools.ietf.org/html/rfc1952">GZIP File Format Specification</a>
040 */
041public class GzipCompressorOutputStream extends CompressorOutputStream {
042
043    /** Header flag indicating a file name follows the header */
044    private static final int FNAME = 1 << 3;
045
046    /** Header flag indicating a comment follows the header */
047    private static final int FCOMMENT = 1 << 4;
048
049    /** The underlying stream */
050    private final OutputStream out;
051
052    /** Deflater used to compress the data */
053    private final Deflater deflater;
054
055    /** The buffer receiving the compressed data from the deflater */
056    private final byte[] deflateBuffer;
057
058    /** Indicates if the stream has been closed */
059    private boolean closed;
060
061    /** The checksum of the uncompressed data */
062    private final CRC32 crc = new CRC32();
063
064    /**
065     * Creates a gzip compressed output stream with the default parameters.
066     *
067     * @param out the stream to compress to
068     * @throws IOException if writing fails
069     */
070    public GzipCompressorOutputStream(final OutputStream out) throws IOException {
071        this(out, new GzipParameters());
072    }
073
074    /**
075     * Creates a gzip compressed output stream with the specified parameters.
076     *
077     * @param out        the stream to compress to
078     * @param parameters the parameters to use
079     * @throws IOException if writing fails
080     *
081     * @since 1.7
082     */
083    public GzipCompressorOutputStream(final OutputStream out, final GzipParameters parameters) throws IOException {
084        this.out = out;
085        this.deflater = new Deflater(parameters.getCompressionLevel(), true);
086        this.deflater.setStrategy(parameters.getDeflateStrategy());
087        this.deflateBuffer = new byte[parameters.getBufferSize()];
088        writeHeader(parameters);
089    }
090
091    @Override
092    public void close() throws IOException {
093        if (!closed) {
094            try {
095                finish();
096            } finally {
097                deflater.end();
098                out.close();
099                closed = true;
100            }
101        }
102    }
103
104    private void deflate() throws IOException {
105        final int length = deflater.deflate(deflateBuffer, 0, deflateBuffer.length);
106        if (length > 0) {
107            out.write(deflateBuffer, 0, length);
108        }
109    }
110
111    /**
112     * Finishes writing compressed data to the underlying stream without closing it.
113     *
114     * @since 1.7
115     * @throws IOException on error
116     */
117    public void finish() throws IOException {
118        if (!deflater.finished()) {
119            deflater.finish();
120
121            while (!deflater.finished()) {
122                deflate();
123            }
124
125            writeTrailer();
126        }
127    }
128
129    /**
130     * {@inheritDoc}
131     *
132     * @since 1.7
133     */
134    @Override
135    public void flush() throws IOException {
136        out.flush();
137    }
138
139    /**
140     * Gets the bytes encoded in the {@value GzipUtils#GZIP_ENCODING} Charset.
141     * <p>
142     * If the string cannot be encoded directly with {@value GzipUtils#GZIP_ENCODING}, then use URI-style percent encoding.
143     * </p>
144     *
145     * @param string The string to encode.
146     * @return
147     * @throws IOException
148     */
149    private byte[] getBytes(final String string) throws IOException {
150        if (GzipUtils.GZIP_ENCODING.newEncoder().canEncode(string)) {
151            return string.getBytes(GzipUtils.GZIP_ENCODING);
152        }
153        try {
154            return new URI(null, null, string, null).toASCIIString().getBytes(StandardCharsets.US_ASCII);
155        } catch (final URISyntaxException e) {
156            throw new IOException(string, e);
157        }
158    }
159
160    /**
161     * {@inheritDoc}
162     *
163     * @since 1.1
164     */
165    @Override
166    public void write(final byte[] buffer) throws IOException {
167        write(buffer, 0, buffer.length);
168    }
169
170    /**
171     * {@inheritDoc}
172     *
173     * @since 1.1
174     */
175    @Override
176    public void write(final byte[] buffer, final int offset, final int length) throws IOException {
177        if (deflater.finished()) {
178            throw new IOException("Cannot write more data, the end of the compressed data stream has been reached");
179        }
180        if (length > 0) {
181            deflater.setInput(buffer, offset, length);
182
183            while (!deflater.needsInput()) {
184                deflate();
185            }
186
187            crc.update(buffer, offset, length);
188        }
189    }
190
191    @Override
192    public void write(final int b) throws IOException {
193        write(new byte[] { (byte) (b & 0xff) }, 0, 1);
194    }
195
196    private void writeHeader(final GzipParameters parameters) throws IOException {
197        final String fileName = parameters.getFileName();
198        final String comment = parameters.getComment();
199
200        final ByteBuffer buffer = ByteBuffer.allocate(10);
201        buffer.order(ByteOrder.LITTLE_ENDIAN);
202        buffer.putShort((short) GZIPInputStream.GZIP_MAGIC);
203        buffer.put((byte) Deflater.DEFLATED); // compression method (8: deflate)
204        buffer.put((byte) ((fileName != null ? FNAME : 0) | (comment != null ? FCOMMENT : 0))); // flags
205        buffer.putInt((int) (parameters.getModificationTime() / 1000));
206
207        // extra flags
208        final int compressionLevel = parameters.getCompressionLevel();
209        if (compressionLevel == Deflater.BEST_COMPRESSION) {
210            buffer.put((byte) 2);
211        } else if (compressionLevel == Deflater.BEST_SPEED) {
212            buffer.put((byte) 4);
213        } else {
214            buffer.put((byte) 0);
215        }
216
217        buffer.put((byte) parameters.getOperatingSystem());
218
219        out.write(buffer.array());
220
221        if (fileName != null) {
222            out.write(getBytes(fileName));
223            out.write(0);
224        }
225
226        if (comment != null) {
227            out.write(getBytes(comment));
228            out.write(0);
229        }
230    }
231
232    private void writeTrailer() throws IOException {
233        final ByteBuffer buffer = ByteBuffer.allocate(8);
234        buffer.order(ByteOrder.LITTLE_ENDIAN);
235        buffer.putInt((int) crc.getValue());
236        buffer.putInt(deflater.getTotalIn());
237
238        out.write(buffer.array());
239    }
240
241}