1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * https://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20 package org.apache.commons.compress.compressors.gzip;
21
22 import java.io.IOException;
23 import java.util.ArrayList;
24 import java.util.Arrays;
25 import java.util.Collections;
26 import java.util.Iterator;
27 import java.util.List;
28 import java.util.Objects;
29
30 import org.apache.commons.compress.compressors.gzip.ExtraField.SubField;
31
32 /**
33 * If the {@code FLG.FEXTRA} bit is set, an "extra field" is present in the header, with total length XLEN bytes.
34 *
35 * <pre>
36 * +---+---+=================================+
37 * | XLEN |...XLEN bytes of "extra field"...| (more...)
38 * +---+---+=================================+
39 * </pre>
40 *
41 * This class represents the extra field payload (excluding the XLEN 2 bytes). The ExtraField payload consists of a series of subfields, each of the form:
42 *
43 * <pre>
44 * +---+---+---+---+==================================+
45 * |SI1|SI2| LEN |... LEN bytes of subfield data ...|
46 * +---+---+---+---+==================================+
47 * </pre>
48 *
49 * This class does not expose the internal subfields list to prevent adding subfields without total extra length validation. The class is iterable, but this
50 * iterator is immutable.
51 *
52 * @see <a href="https://datatracker.ietf.org/doc/html/rfc1952">RFC 1952 GZIP File Format Specification</a>
53 * @since 1.28.0
54 */
55 public final class ExtraField implements Iterable<SubField> {
56
57 /**
58 * If the {@code FLG.FEXTRA} bit is set, an "extra field" is present in the header, with total length XLEN bytes. It consists of a series of subfields, each
59 * of the form:
60 *
61 * <pre>
62 * +---+---+---+---+==================================+
63 * |SI1|SI2| LEN |... LEN bytes of subfield data ...|
64 * +---+---+---+---+==================================+
65 * </pre>
66 * <p>
67 * The reserved IDs are:
68 * </p>
69 *
70 * <pre>
71 * SI1 SI2 Data
72 * ---------- ---------- ----
73 * 0x41 ('A') 0x70 ('P') Apollo file type information
74 * </pre>
75 * <p>
76 * Subfield IDs with {@code SI2 = 0} are reserved for future use.
77 * </p>
78 * <p>
79 * LEN gives the length of the subfield data, excluding the 4 initial bytes.
80 * </p>
81 *
82 * @see <a href="https://datatracker.ietf.org/doc/html/rfc1952">RFC 1952 GZIP File Format Specification</a>
83 */
84 public static final class SubField {
85
86 private final byte si1;
87 private final byte si2;
88 private final byte[] payload;
89
90 SubField(final byte si1, final byte si2, final byte[] payload) {
91 this.si1 = si1;
92 this.si2 = si2;
93 this.payload = payload;
94 }
95
96 @Override
97 public boolean equals(final Object obj) {
98 if (this == obj) {
99 return true;
100 }
101 if (obj == null) {
102 return false;
103 }
104 if (getClass() != obj.getClass()) {
105 return false;
106 }
107 final SubField other = (SubField) obj;
108 return Arrays.equals(payload, other.payload) && si1 == other.si1 && si2 == other.si2;
109 }
110
111 /**
112 * The 2 character ISO-8859-1 string made from the si1 and si2 bytes of the sub field id.
113 *
114 * @return Two character ID.
115 */
116 public String getId() {
117 return String.valueOf(new char[] { (char) (si1 & 0xff), (char) (si2 & 0xff) });
118 }
119
120 /**
121 * The subfield payload.
122 *
123 * @return The payload.
124 */
125 public byte[] getPayload() {
126 return payload;
127 }
128
129 @Override
130 public int hashCode() {
131 final int prime = 31;
132 int result = 1;
133 result = prime * result + Arrays.hashCode(payload);
134 result = prime * result + Objects.hash(si1, si2);
135 return result;
136 }
137 }
138
139 private static final int MAX_SIZE = 0xFFFF;
140
141 private static final byte[] ZERO_BYTES = {};
142
143 static ExtraField fromBytes(final byte[] bytes) throws IOException {
144 if (bytes == null) {
145 return null;
146 }
147 final ExtraField extra = new ExtraField();
148 int pos = 0;
149 while (pos <= bytes.length - 4) {
150 final byte si1 = bytes[pos++];
151 final byte si2 = bytes[pos++];
152 final int sublen = bytes[pos++] & 0xff | (bytes[pos++] & 0xff) << 8;
153 if (sublen > bytes.length - pos) {
154 throw new IOException("Extra subfield lenght exceeds remaining bytes in extra: " + sublen + " > " + (bytes.length - pos));
155 }
156 final byte[] payload = new byte[sublen];
157 System.arraycopy(bytes, pos, payload, 0, sublen);
158 pos += sublen;
159 extra.subFields.add(new SubField(si1, si2, payload));
160 extra.totalSize = pos;
161 }
162 if (pos < bytes.length) {
163 throw new IOException("" + (bytes.length - pos) + " remaining bytes not used to parse an extra subfield.");
164 }
165 return extra;
166 }
167
168 private final List<SubField> subFields = new ArrayList<>();
169
170 private int totalSize;
171
172 /**
173 * Constructs a new instance.
174 */
175 public ExtraField() {
176 }
177
178 /**
179 * Append a subfield by a 2-chars ISO-8859-1 string. The char at index 0 and 1 are respectively si1 and si2 (subfield id 1 and 2).
180 *
181 * @param id The subfield ID.
182 * @param payload The subfield payload.
183 * @return this instance.
184 * @throws NullPointerException if {@code id} is {@code null}.
185 * @throws NullPointerException if {@code payload} is {@code null}.
186 * @throws IllegalArgumentException if the subfield is not 2 characters or the payload is null
187 * @throws IOException if appending this subfield would exceed the max size 65535 of the extra header.
188 */
189 public ExtraField addSubField(final String id, final byte[] payload) throws IOException {
190 Objects.requireNonNull(id, "payload");
191 Objects.requireNonNull(payload, "payload");
192 if (id.length() != 2) {
193 throw new IllegalArgumentException("Subfield id must be a 2 character ISO-8859-1 string.");
194 }
195 final char si1 = id.charAt(0);
196 final char si2 = id.charAt(1);
197 if ((si1 & 0xff00) != 0 || (si2 & 0xff00) != 0) {
198 throw new IllegalArgumentException("Subfield id must be a 2 character ISO-8859-1 string.");
199 }
200 final SubField f = new SubField((byte) (si1 & 0xff), (byte) (si2 & 0xff), payload);
201 final int len = 4 + payload.length;
202 if (totalSize + len > MAX_SIZE) {
203 throw new IOException("Extra subfield '" + f.getId() + "' too big (extras total size is already at " + totalSize + ")");
204 }
205 subFields.add(f);
206 totalSize += len;
207 return this;
208 }
209
210 /**
211 * Removes all subfields from this instance.
212 */
213 public void clear() {
214 subFields.clear();
215 totalSize = 0;
216 }
217
218 @Override
219 public boolean equals(final Object obj) {
220 if (this == obj) {
221 return true;
222 }
223 if (obj == null) {
224 return false;
225 }
226 if (getClass() != obj.getClass()) {
227 return false;
228 }
229 final ExtraField other = (ExtraField) obj;
230 return Objects.equals(subFields, other.subFields) && totalSize == other.totalSize;
231 }
232
233 /**
234 * Finds the first subfield that matched the id if found, null otherwise.
235 *
236 * @param id The ID to find.
237 * @return The first SubField that matched or null.
238 */
239 public SubField findFirstSubField(final String id) {
240 return subFields.stream().filter(f -> f.getId().equals(id)).findFirst().orElse(null);
241 }
242
243 /**
244 * Gets the size in bytes of the encoded extra field. This does not include its own 16 bits size when embeded in the gzip header. For N sub fields,
245 * the total is all subfields payloads bytes + 4N.
246 *
247 * @return the bytes count of this extra payload when encoded.
248 */
249 public int getEncodedSize() {
250 return totalSize;
251 }
252
253 /**
254 * Gets the subfield at the given index.
255 *
256 * @param index index of the element to return.
257 * @return the subfield at the specified position in this list.
258 * @throws IndexOutOfBoundsException if the index is out of range ({@code index < 0 || index >= size()}).
259 */
260 public SubField getSubField(final int index) {
261 return subFields.get(index);
262 }
263
264 @Override
265 public int hashCode() {
266 return Objects.hash(subFields, totalSize);
267 }
268
269 /**
270 * Tests is this extra field has no subfields.
271 *
272 * @return true if there are no subfields, false otherwise.
273 */
274 public boolean isEmpty() {
275 return subFields.isEmpty();
276 }
277
278 /**
279 * Returns an unmodifiable iterator over the elements in the SubField list in proper sequence.
280 *
281 * @return an unmodifiable naturally ordered iterator over the SubField elements.
282 */
283 @Override
284 public Iterator<SubField> iterator() {
285 return Collections.unmodifiableList(subFields).iterator();
286 }
287
288 /**
289 * Gets the count of subfields currently in in this extra field.
290 *
291 * @return the count of subfields contained in this instance.
292 */
293 public int size() {
294 return subFields.size();
295 }
296
297 byte[] toByteArray() {
298 if (subFields.isEmpty()) {
299 return ZERO_BYTES;
300 }
301 final byte[] ba = new byte[totalSize];
302 int pos = 0;
303 for (final SubField f : subFields) {
304 ba[pos++] = f.si1;
305 ba[pos++] = f.si2;
306 ba[pos++] = (byte) (f.payload.length & 0xff); // little endian expected
307 ba[pos++] = (byte) (f.payload.length >>> 8);
308 System.arraycopy(f.payload, 0, ba, pos, f.payload.length);
309 pos += f.payload.length;
310 }
311 return ba;
312 }
313
314 }