001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * https://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019 020package org.apache.commons.compress.compressors.gzip; 021 022import java.io.IOException; 023import java.util.ArrayList; 024import java.util.Arrays; 025import java.util.Collections; 026import java.util.Iterator; 027import java.util.List; 028import java.util.Objects; 029 030import org.apache.commons.compress.compressors.gzip.ExtraField.SubField; 031 032/** 033 * If the {@code FLG.FEXTRA} bit is set, an "extra field" is present in the header, with total length XLEN bytes. 034 * 035 * <pre> 036 * +---+---+=================================+ 037 * | XLEN |...XLEN bytes of "extra field"...| (more...) 038 * +---+---+=================================+ 039 * </pre> 040 * 041 * This class represents the extra field payload (excluding the XLEN 2 bytes). The ExtraField payload consists of a series of subfields, each of the form: 042 * 043 * <pre> 044 * +---+---+---+---+==================================+ 045 * |SI1|SI2| LEN |... LEN bytes of subfield data ...| 046 * +---+---+---+---+==================================+ 047 * </pre> 048 * 049 * This class does not expose the internal subfields list to prevent adding subfields without total extra length validation. The class is iterable, but this 050 * iterator is immutable. 051 * 052 * @see <a href="https://datatracker.ietf.org/doc/html/rfc1952">RFC 1952 GZIP File Format Specification</a> 053 * @since 1.28.0 054 */ 055public final class ExtraField implements Iterable<SubField> { 056 057 /** 058 * If the {@code FLG.FEXTRA} bit is set, an "extra field" is present in the header, with total length XLEN bytes. It consists of a series of subfields, each 059 * of the form: 060 * 061 * <pre> 062 * +---+---+---+---+==================================+ 063 * |SI1|SI2| LEN |... LEN bytes of subfield data ...| 064 * +---+---+---+---+==================================+ 065 * </pre> 066 * <p> 067 * The reserved IDs are: 068 * </p> 069 * 070 * <pre> 071 * SI1 SI2 Data 072 * ---------- ---------- ---- 073 * 0x41 ('A') 0x70 ('P') Apollo file type information 074 * </pre> 075 * <p> 076 * Subfield IDs with {@code SI2 = 0} are reserved for future use. 077 * </p> 078 * <p> 079 * LEN gives the length of the subfield data, excluding the 4 initial bytes. 080 * </p> 081 * 082 * @see <a href="https://datatracker.ietf.org/doc/html/rfc1952">RFC 1952 GZIP File Format Specification</a> 083 */ 084 public static final class SubField { 085 086 private final byte si1; 087 private final byte si2; 088 private final byte[] payload; 089 090 SubField(final byte si1, final byte si2, final byte[] payload) { 091 this.si1 = si1; 092 this.si2 = si2; 093 this.payload = payload; 094 } 095 096 @Override 097 public boolean equals(final Object obj) { 098 if (this == obj) { 099 return true; 100 } 101 if (obj == null) { 102 return false; 103 } 104 if (getClass() != obj.getClass()) { 105 return false; 106 } 107 final SubField other = (SubField) obj; 108 return Arrays.equals(payload, other.payload) && si1 == other.si1 && si2 == other.si2; 109 } 110 111 /** 112 * The 2 character ISO-8859-1 string made from the si1 and si2 bytes of the sub field id. 113 * 114 * @return Two character ID. 115 */ 116 public String getId() { 117 return String.valueOf(new char[] { (char) (si1 & 0xff), (char) (si2 & 0xff) }); 118 } 119 120 /** 121 * The subfield payload. 122 * 123 * @return The payload. 124 */ 125 public byte[] getPayload() { 126 return payload; 127 } 128 129 @Override 130 public int hashCode() { 131 final int prime = 31; 132 int result = 1; 133 result = prime * result + Arrays.hashCode(payload); 134 result = prime * result + Objects.hash(si1, si2); 135 return result; 136 } 137 } 138 139 private static final int MAX_SIZE = 0xFFFF; 140 141 private static final byte[] ZERO_BYTES = {}; 142 143 static ExtraField fromBytes(final byte[] bytes) throws IOException { 144 if (bytes == null) { 145 return null; 146 } 147 final ExtraField extra = new ExtraField(); 148 int pos = 0; 149 while (pos <= bytes.length - 4) { 150 final byte si1 = bytes[pos++]; 151 final byte si2 = bytes[pos++]; 152 final int sublen = bytes[pos++] & 0xff | (bytes[pos++] & 0xff) << 8; 153 if (sublen > bytes.length - pos) { 154 throw new IOException("Extra subfield lenght exceeds remaining bytes in extra: " + sublen + " > " + (bytes.length - pos)); 155 } 156 final byte[] payload = new byte[sublen]; 157 System.arraycopy(bytes, pos, payload, 0, sublen); 158 pos += sublen; 159 extra.subFields.add(new SubField(si1, si2, payload)); 160 extra.totalSize = pos; 161 } 162 if (pos < bytes.length) { 163 throw new IOException("" + (bytes.length - pos) + " remaining bytes not used to parse an extra subfield."); 164 } 165 return extra; 166 } 167 168 private final List<SubField> subFields = new ArrayList<>(); 169 170 private int totalSize; 171 172 /** 173 * Constructs a new instance. 174 */ 175 public ExtraField() { 176 } 177 178 /** 179 * Append a subfield by a 2-chars ISO-8859-1 string. The char at index 0 and 1 are respectively si1 and si2 (subfield id 1 and 2). 180 * 181 * @param id The subfield ID. 182 * @param payload The subfield payload. 183 * @return this instance. 184 * @throws NullPointerException if {@code id} is {@code null}. 185 * @throws NullPointerException if {@code payload} is {@code null}. 186 * @throws IllegalArgumentException if the subfield is not 2 characters or the payload is null 187 * @throws IOException if appending this subfield would exceed the max size 65535 of the extra header. 188 */ 189 public ExtraField addSubField(final String id, final byte[] payload) throws IOException { 190 Objects.requireNonNull(id, "payload"); 191 Objects.requireNonNull(payload, "payload"); 192 if (id.length() != 2) { 193 throw new IllegalArgumentException("Subfield id must be a 2 character ISO-8859-1 string."); 194 } 195 final char si1 = id.charAt(0); 196 final char si2 = id.charAt(1); 197 if ((si1 & 0xff00) != 0 || (si2 & 0xff00) != 0) { 198 throw new IllegalArgumentException("Subfield id must be a 2 character ISO-8859-1 string."); 199 } 200 final SubField f = new SubField((byte) (si1 & 0xff), (byte) (si2 & 0xff), payload); 201 final int len = 4 + payload.length; 202 if (totalSize + len > MAX_SIZE) { 203 throw new IOException("Extra subfield '" + f.getId() + "' too big (extras total size is already at " + totalSize + ")"); 204 } 205 subFields.add(f); 206 totalSize += len; 207 return this; 208 } 209 210 /** 211 * Removes all subfields from this instance. 212 */ 213 public void clear() { 214 subFields.clear(); 215 totalSize = 0; 216 } 217 218 @Override 219 public boolean equals(final Object obj) { 220 if (this == obj) { 221 return true; 222 } 223 if (obj == null) { 224 return false; 225 } 226 if (getClass() != obj.getClass()) { 227 return false; 228 } 229 final ExtraField other = (ExtraField) obj; 230 return Objects.equals(subFields, other.subFields) && totalSize == other.totalSize; 231 } 232 233 /** 234 * Finds the first subfield that matched the id if found, null otherwise. 235 * 236 * @param id The ID to find. 237 * @return The first SubField that matched or null. 238 */ 239 public SubField findFirstSubField(final String id) { 240 return subFields.stream().filter(f -> f.getId().equals(id)).findFirst().orElse(null); 241 } 242 243 /** 244 * Gets the size in bytes of the encoded extra field. This does not include its own 16 bits size when embeded in the gzip header. For N sub fields, 245 * the total is all subfields payloads bytes + 4N. 246 * 247 * @return the bytes count of this extra payload when encoded. 248 */ 249 public int getEncodedSize() { 250 return totalSize; 251 } 252 253 /** 254 * Gets the subfield at the given index. 255 * 256 * @param index index of the element to return. 257 * @return the subfield at the specified position in this list. 258 * @throws IndexOutOfBoundsException if the index is out of range ({@code index < 0 || index >= size()}). 259 */ 260 public SubField getSubField(final int index) { 261 return subFields.get(index); 262 } 263 264 @Override 265 public int hashCode() { 266 return Objects.hash(subFields, totalSize); 267 } 268 269 /** 270 * Tests is this extra field has no subfields. 271 * 272 * @return true if there are no subfields, false otherwise. 273 */ 274 public boolean isEmpty() { 275 return subFields.isEmpty(); 276 } 277 278 /** 279 * Returns an unmodifiable iterator over the elements in the SubField list in proper sequence. 280 * 281 * @return an unmodifiable naturally ordered iterator over the SubField elements. 282 */ 283 @Override 284 public Iterator<SubField> iterator() { 285 return Collections.unmodifiableList(subFields).iterator(); 286 } 287 288 /** 289 * Gets the count of subfields currently in in this extra field. 290 * 291 * @return the count of subfields contained in this instance. 292 */ 293 public int size() { 294 return subFields.size(); 295 } 296 297 byte[] toByteArray() { 298 if (subFields.isEmpty()) { 299 return ZERO_BYTES; 300 } 301 final byte[] ba = new byte[totalSize]; 302 int pos = 0; 303 for (final SubField f : subFields) { 304 ba[pos++] = f.si1; 305 ba[pos++] = f.si2; 306 ba[pos++] = (byte) (f.payload.length & 0xff); // little endian expected 307 ba[pos++] = (byte) (f.payload.length >>> 8); 308 System.arraycopy(f.payload, 0, ba, pos, f.payload.length); 309 pos += f.payload.length; 310 } 311 return ba; 312 } 313 314}