View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   https://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  
20  package org.apache.commons.compress.compressors.gzip;
21  
22  import java.io.IOException;
23  import java.util.ArrayList;
24  import java.util.Arrays;
25  import java.util.Collections;
26  import java.util.Iterator;
27  import java.util.List;
28  import java.util.Objects;
29  
30  import org.apache.commons.compress.compressors.gzip.ExtraField.SubField;
31  
32  /**
33   * If the {@code FLG.FEXTRA} bit is set, an "extra field" is present in the header, with total length XLEN bytes.
34   *
35   * <pre>
36   * +---+---+=================================+
37   * | XLEN  |...XLEN bytes of "extra field"...| (more...)
38   * +---+---+=================================+
39   * </pre>
40   *
41   * This class represents the extra field payload (excluding the XLEN 2 bytes). The ExtraField payload consists of a series of subfields, each of the form:
42   *
43   * <pre>
44   * +---+---+---+---+==================================+
45   * |SI1|SI2|  LEN  |... LEN bytes of subfield data ...|
46   * +---+---+---+---+==================================+
47   * </pre>
48   *
49   * This class does not expose the internal subfields list to prevent adding subfields without total extra length validation. The class is iterable, but this
50   * iterator is immutable.
51   *
52   * @see <a href="https://datatracker.ietf.org/doc/html/rfc1952">RFC 1952 GZIP File Format Specification</a>
53   * @since 1.28.0
54   */
55  public final class ExtraField implements Iterable<SubField> {
56  
57      /**
58       * If the {@code FLG.FEXTRA} bit is set, an "extra field" is present in the header, with total length XLEN bytes. It consists of a series of subfields, each
59       * of the form:
60       *
61       * <pre>
62       * +---+---+---+---+==================================+
63       * |SI1|SI2|  LEN  |... LEN bytes of subfield data ...|
64       * +---+---+---+---+==================================+
65       * </pre>
66       * <p>
67       * The reserved IDs are:
68       * </p>
69       *
70       * <pre>
71       * SI1         SI2         Data
72       * ----------  ----------  ----
73       * 0x41 ('A')  0x70 ('P')  Apollo file type information
74       * </pre>
75       * <p>
76       * Subfield IDs with {@code SI2 = 0} are reserved for future use.
77       * </p>
78       * <p>
79       * LEN gives the length of the subfield data, excluding the 4 initial bytes.
80       * </p>
81       *
82       * @see <a href="https://datatracker.ietf.org/doc/html/rfc1952">RFC 1952 GZIP File Format Specification</a>
83       */
84      public static final class SubField {
85  
86          private final byte si1;
87          private final byte si2;
88          private final byte[] payload;
89  
90          SubField(final byte si1, final byte si2, final byte[] payload) {
91              this.si1 = si1;
92              this.si2 = si2;
93              this.payload = payload;
94          }
95  
96          @Override
97          public boolean equals(final Object obj) {
98              if (this == obj) {
99                  return true;
100             }
101             if (obj == null) {
102                 return false;
103             }
104             if (getClass() != obj.getClass()) {
105                 return false;
106             }
107             final SubField other = (SubField) obj;
108             return Arrays.equals(payload, other.payload) && si1 == other.si1 && si2 == other.si2;
109         }
110 
111         /**
112          * The 2 character ISO-8859-1 string made from the si1 and si2 bytes of the sub field id.
113          *
114          * @return Two character ID.
115          */
116         public String getId() {
117             return String.valueOf(new char[] { (char) (si1 & 0xff), (char) (si2 & 0xff) });
118         }
119 
120         /**
121          * The subfield payload.
122          *
123          * @return The payload.
124          */
125         public byte[] getPayload() {
126             return payload;
127         }
128 
129         @Override
130         public int hashCode() {
131             final int prime = 31;
132             int result = 1;
133             result = prime * result + Arrays.hashCode(payload);
134             result = prime * result + Objects.hash(si1, si2);
135             return result;
136         }
137     }
138 
139     private static final int MAX_SIZE = 0xFFFF;
140 
141     private static final byte[] ZERO_BYTES = {};
142 
143     static ExtraField fromBytes(final byte[] bytes) throws IOException {
144         if (bytes == null) {
145             return null;
146         }
147         final ExtraField extra = new ExtraField();
148         int pos = 0;
149         while (pos <= bytes.length - 4) {
150             final byte si1 = bytes[pos++];
151             final byte si2 = bytes[pos++];
152             final int sublen = bytes[pos++] & 0xff | (bytes[pos++] & 0xff) << 8;
153             if (sublen > bytes.length - pos) {
154                 throw new IOException("Extra subfield lenght exceeds remaining bytes in extra: " + sublen + " > " + (bytes.length - pos));
155             }
156             final byte[] payload = new byte[sublen];
157             System.arraycopy(bytes, pos, payload, 0, sublen);
158             pos += sublen;
159             extra.subFields.add(new SubField(si1, si2, payload));
160             extra.totalSize = pos;
161         }
162         if (pos < bytes.length) {
163             throw new IOException("" + (bytes.length - pos) + " remaining bytes not used to parse an extra subfield.");
164         }
165         return extra;
166     }
167 
168     private final List<SubField> subFields = new ArrayList<>();
169 
170     private int totalSize;
171 
172     /**
173      * Constructs a new instance.
174      */
175     public ExtraField() {
176     }
177 
178     /**
179      * Append a subfield by a 2-chars ISO-8859-1 string. The char at index 0 and 1 are respectively si1 and si2 (subfield id 1 and 2).
180      *
181      * @param id      The subfield ID.
182      * @param payload The subfield payload.
183      * @return this instance.
184      * @throws NullPointerException     if {@code id} is {@code null}.
185      * @throws NullPointerException     if {@code payload} is {@code null}.
186      * @throws IllegalArgumentException if the subfield is not 2 characters or the payload is null
187      * @throws IOException              if appending this subfield would exceed the max size 65535 of the extra header.
188      */
189     public ExtraField addSubField(final String id, final byte[] payload) throws IOException {
190         Objects.requireNonNull(id, "payload");
191         Objects.requireNonNull(payload, "payload");
192         if (id.length() != 2) {
193             throw new IllegalArgumentException("Subfield id must be a 2 character ISO-8859-1 string.");
194         }
195         final char si1 = id.charAt(0);
196         final char si2 = id.charAt(1);
197         if ((si1 & 0xff00) != 0 || (si2 & 0xff00) != 0) {
198             throw new IllegalArgumentException("Subfield id must be a 2 character ISO-8859-1 string.");
199         }
200         final SubField f = new SubField((byte) (si1 & 0xff), (byte) (si2 & 0xff), payload);
201         final int len = 4 + payload.length;
202         if (totalSize + len > MAX_SIZE) {
203             throw new IOException("Extra subfield '" + f.getId() + "' too big (extras total size is already at " + totalSize + ")");
204         }
205         subFields.add(f);
206         totalSize += len;
207         return this;
208     }
209 
210     /**
211      * Removes all subfields from this instance.
212      */
213     public void clear() {
214         subFields.clear();
215         totalSize = 0;
216     }
217 
218     @Override
219     public boolean equals(final Object obj) {
220         if (this == obj) {
221             return true;
222         }
223         if (obj == null) {
224             return false;
225         }
226         if (getClass() != obj.getClass()) {
227             return false;
228         }
229         final ExtraField other = (ExtraField) obj;
230         return Objects.equals(subFields, other.subFields) && totalSize == other.totalSize;
231     }
232 
233     /**
234      * Finds the first subfield that matched the id if found, null otherwise.
235      *
236      * @param id The ID to find.
237      * @return The first SubField that matched or null.
238      */
239     public SubField findFirstSubField(final String id) {
240         return subFields.stream().filter(f -> f.getId().equals(id)).findFirst().orElse(null);
241     }
242 
243     /**
244      * Gets the size in bytes of the encoded extra field. This does not include its own 16 bits size when embeded in the gzip header. For N sub fields,
245      * the total is all subfields payloads bytes + 4N.
246      *
247      * @return the bytes count of this extra payload when encoded.
248      */
249     public int getEncodedSize() {
250         return totalSize;
251     }
252 
253     /**
254      * Gets the subfield at the given index.
255      *
256      * @param index index of the element to return.
257      * @return the subfield at the specified position in this list.
258      * @throws IndexOutOfBoundsException if the index is out of range ({@code index &lt; 0 || index &gt;= size()}).
259      */
260     public SubField getSubField(final int index) {
261         return subFields.get(index);
262     }
263 
264     @Override
265     public int hashCode() {
266         return Objects.hash(subFields, totalSize);
267     }
268 
269     /**
270      * Tests is this extra field has no subfields.
271      *
272      * @return true if there are no subfields, false otherwise.
273      */
274     public boolean isEmpty() {
275         return subFields.isEmpty();
276     }
277 
278     /**
279      * Returns an unmodifiable iterator over the elements in the SubField list in proper sequence.
280      *
281      * @return an unmodifiable naturally ordered iterator over the SubField elements.
282      */
283     @Override
284     public Iterator<SubField> iterator() {
285         return Collections.unmodifiableList(subFields).iterator();
286     }
287 
288     /**
289      * Gets the count of subfields currently in in this extra field.
290      *
291      * @return the count of subfields contained in this instance.
292      */
293     public int size() {
294         return subFields.size();
295     }
296 
297     byte[] toByteArray() {
298         if (subFields.isEmpty()) {
299             return ZERO_BYTES;
300         }
301         final byte[] ba = new byte[totalSize];
302         int pos = 0;
303         for (final SubField f : subFields) {
304             ba[pos++] = f.si1;
305             ba[pos++] = f.si2;
306             ba[pos++] = (byte) (f.payload.length & 0xff); // little endian expected
307             ba[pos++] = (byte) (f.payload.length >>> 8);
308             System.arraycopy(f.payload, 0, ba, pos, f.payload.length);
309             pos += f.payload.length;
310         }
311         return ba;
312     }
313 
314 }