001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *   https://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019
020package org.apache.commons.compress.compressors.gzip;
021
022import java.io.IOException;
023import java.util.ArrayList;
024import java.util.Arrays;
025import java.util.Collections;
026import java.util.Iterator;
027import java.util.List;
028import java.util.Objects;
029
030import org.apache.commons.compress.compressors.gzip.ExtraField.SubField;
031
032/**
033 * If the {@code FLG.FEXTRA} bit is set, an "extra field" is present in the header, with total length XLEN bytes.
034 *
035 * <pre>
036 * +---+---+=================================+
037 * | XLEN  |...XLEN bytes of "extra field"...| (more...)
038 * +---+---+=================================+
039 * </pre>
040 *
041 * This class represents the extra field payload (excluding the XLEN 2 bytes). The ExtraField payload consists of a series of subfields, each of the form:
042 *
043 * <pre>
044 * +---+---+---+---+==================================+
045 * |SI1|SI2|  LEN  |... LEN bytes of subfield data ...|
046 * +---+---+---+---+==================================+
047 * </pre>
048 *
049 * This class does not expose the internal subfields list to prevent adding subfields without total extra length validation. The class is iterable, but this
050 * iterator is immutable.
051 *
052 * @see <a href="https://datatracker.ietf.org/doc/html/rfc1952">RFC 1952 GZIP File Format Specification</a>
053 * @since 1.28.0
054 */
055public final class ExtraField implements Iterable<SubField> {
056
057    /**
058     * If the {@code FLG.FEXTRA} bit is set, an "extra field" is present in the header, with total length XLEN bytes. It consists of a series of subfields, each
059     * of the form:
060     *
061     * <pre>
062     * +---+---+---+---+==================================+
063     * |SI1|SI2|  LEN  |... LEN bytes of subfield data ...|
064     * +---+---+---+---+==================================+
065     * </pre>
066     * <p>
067     * The reserved IDs are:
068     * </p>
069     *
070     * <pre>
071     * SI1         SI2         Data
072     * ----------  ----------  ----
073     * 0x41 ('A')  0x70 ('P')  Apollo file type information
074     * </pre>
075     * <p>
076     * Subfield IDs with {@code SI2 = 0} are reserved for future use.
077     * </p>
078     * <p>
079     * LEN gives the length of the subfield data, excluding the 4 initial bytes.
080     * </p>
081     *
082     * @see <a href="https://datatracker.ietf.org/doc/html/rfc1952">RFC 1952 GZIP File Format Specification</a>
083     */
084    public static final class SubField {
085
086        private final byte si1;
087        private final byte si2;
088        private final byte[] payload;
089
090        SubField(final byte si1, final byte si2, final byte[] payload) {
091            this.si1 = si1;
092            this.si2 = si2;
093            this.payload = payload;
094        }
095
096        @Override
097        public boolean equals(final Object obj) {
098            if (this == obj) {
099                return true;
100            }
101            if (obj == null) {
102                return false;
103            }
104            if (getClass() != obj.getClass()) {
105                return false;
106            }
107            final SubField other = (SubField) obj;
108            return Arrays.equals(payload, other.payload) && si1 == other.si1 && si2 == other.si2;
109        }
110
111        /**
112         * The 2 character ISO-8859-1 string made from the si1 and si2 bytes of the sub field id.
113         *
114         * @return Two character ID.
115         */
116        public String getId() {
117            return String.valueOf(new char[] { (char) (si1 & 0xff), (char) (si2 & 0xff) });
118        }
119
120        /**
121         * The subfield payload.
122         *
123         * @return The payload.
124         */
125        public byte[] getPayload() {
126            return payload;
127        }
128
129        @Override
130        public int hashCode() {
131            final int prime = 31;
132            int result = 1;
133            result = prime * result + Arrays.hashCode(payload);
134            result = prime * result + Objects.hash(si1, si2);
135            return result;
136        }
137    }
138
139    private static final int MAX_SIZE = 0xFFFF;
140
141    private static final byte[] ZERO_BYTES = {};
142
143    static ExtraField fromBytes(final byte[] bytes) throws IOException {
144        if (bytes == null) {
145            return null;
146        }
147        final ExtraField extra = new ExtraField();
148        int pos = 0;
149        while (pos <= bytes.length - 4) {
150            final byte si1 = bytes[pos++];
151            final byte si2 = bytes[pos++];
152            final int sublen = bytes[pos++] & 0xff | (bytes[pos++] & 0xff) << 8;
153            if (sublen > bytes.length - pos) {
154                throw new IOException("Extra subfield lenght exceeds remaining bytes in extra: " + sublen + " > " + (bytes.length - pos));
155            }
156            final byte[] payload = new byte[sublen];
157            System.arraycopy(bytes, pos, payload, 0, sublen);
158            pos += sublen;
159            extra.subFields.add(new SubField(si1, si2, payload));
160            extra.totalSize = pos;
161        }
162        if (pos < bytes.length) {
163            throw new IOException("" + (bytes.length - pos) + " remaining bytes not used to parse an extra subfield.");
164        }
165        return extra;
166    }
167
168    private final List<SubField> subFields = new ArrayList<>();
169
170    private int totalSize;
171
172    /**
173     * Constructs a new instance.
174     */
175    public ExtraField() {
176    }
177
178    /**
179     * Append a subfield by a 2-chars ISO-8859-1 string. The char at index 0 and 1 are respectively si1 and si2 (subfield id 1 and 2).
180     *
181     * @param id      The subfield ID.
182     * @param payload The subfield payload.
183     * @return this instance.
184     * @throws NullPointerException     if {@code id} is {@code null}.
185     * @throws NullPointerException     if {@code payload} is {@code null}.
186     * @throws IllegalArgumentException if the subfield is not 2 characters or the payload is null
187     * @throws IOException              if appending this subfield would exceed the max size 65535 of the extra header.
188     */
189    public ExtraField addSubField(final String id, final byte[] payload) throws IOException {
190        Objects.requireNonNull(id, "payload");
191        Objects.requireNonNull(payload, "payload");
192        if (id.length() != 2) {
193            throw new IllegalArgumentException("Subfield id must be a 2 character ISO-8859-1 string.");
194        }
195        final char si1 = id.charAt(0);
196        final char si2 = id.charAt(1);
197        if ((si1 & 0xff00) != 0 || (si2 & 0xff00) != 0) {
198            throw new IllegalArgumentException("Subfield id must be a 2 character ISO-8859-1 string.");
199        }
200        final SubField f = new SubField((byte) (si1 & 0xff), (byte) (si2 & 0xff), payload);
201        final int len = 4 + payload.length;
202        if (totalSize + len > MAX_SIZE) {
203            throw new IOException("Extra subfield '" + f.getId() + "' too big (extras total size is already at " + totalSize + ")");
204        }
205        subFields.add(f);
206        totalSize += len;
207        return this;
208    }
209
210    /**
211     * Removes all subfields from this instance.
212     */
213    public void clear() {
214        subFields.clear();
215        totalSize = 0;
216    }
217
218    @Override
219    public boolean equals(final Object obj) {
220        if (this == obj) {
221            return true;
222        }
223        if (obj == null) {
224            return false;
225        }
226        if (getClass() != obj.getClass()) {
227            return false;
228        }
229        final ExtraField other = (ExtraField) obj;
230        return Objects.equals(subFields, other.subFields) && totalSize == other.totalSize;
231    }
232
233    /**
234     * Finds the first subfield that matched the id if found, null otherwise.
235     *
236     * @param id The ID to find.
237     * @return The first SubField that matched or null.
238     */
239    public SubField findFirstSubField(final String id) {
240        return subFields.stream().filter(f -> f.getId().equals(id)).findFirst().orElse(null);
241    }
242
243    /**
244     * Gets the size in bytes of the encoded extra field. This does not include its own 16 bits size when embeded in the gzip header. For N sub fields,
245     * the total is all subfields payloads bytes + 4N.
246     *
247     * @return the bytes count of this extra payload when encoded.
248     */
249    public int getEncodedSize() {
250        return totalSize;
251    }
252
253    /**
254     * Gets the subfield at the given index.
255     *
256     * @param index index of the element to return.
257     * @return the subfield at the specified position in this list.
258     * @throws IndexOutOfBoundsException if the index is out of range ({@code index &lt; 0 || index &gt;= size()}).
259     */
260    public SubField getSubField(final int index) {
261        return subFields.get(index);
262    }
263
264    @Override
265    public int hashCode() {
266        return Objects.hash(subFields, totalSize);
267    }
268
269    /**
270     * Tests is this extra field has no subfields.
271     *
272     * @return true if there are no subfields, false otherwise.
273     */
274    public boolean isEmpty() {
275        return subFields.isEmpty();
276    }
277
278    /**
279     * Returns an unmodifiable iterator over the elements in the SubField list in proper sequence.
280     *
281     * @return an unmodifiable naturally ordered iterator over the SubField elements.
282     */
283    @Override
284    public Iterator<SubField> iterator() {
285        return Collections.unmodifiableList(subFields).iterator();
286    }
287
288    /**
289     * Gets the count of subfields currently in in this extra field.
290     *
291     * @return the count of subfields contained in this instance.
292     */
293    public int size() {
294        return subFields.size();
295    }
296
297    byte[] toByteArray() {
298        if (subFields.isEmpty()) {
299            return ZERO_BYTES;
300        }
301        final byte[] ba = new byte[totalSize];
302        int pos = 0;
303        for (final SubField f : subFields) {
304            ba[pos++] = f.si1;
305            ba[pos++] = f.si2;
306            ba[pos++] = (byte) (f.payload.length & 0xff); // little endian expected
307            ba[pos++] = (byte) (f.payload.length >>> 8);
308            System.arraycopy(f.payload, 0, ba, pos, f.payload.length);
309            pos += f.payload.length;
310        }
311        return ba;
312    }
313
314}