View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.imaging.formats.jpeg.iptc;
19  
20  import static org.apache.commons.imaging.common.BinaryFunctions.read2Bytes;
21  import static org.apache.commons.imaging.common.BinaryFunctions.read4Bytes;
22  import static org.apache.commons.imaging.common.BinaryFunctions.readByte;
23  import static org.apache.commons.imaging.common.BinaryFunctions.readBytes;
24  import static org.apache.commons.imaging.common.BinaryFunctions.slice;
25  import static org.apache.commons.imaging.common.BinaryFunctions.startsWith;
26  
27  import java.io.ByteArrayInputStream;
28  import java.io.ByteArrayOutputStream;
29  import java.io.IOException;
30  import java.io.InputStream;
31  import java.nio.ByteOrder;
32  import java.util.ArrayList;
33  import java.util.Collections;
34  import java.util.Comparator;
35  import java.util.List;
36  import java.util.Map;
37  
38  import org.apache.commons.imaging.ImageReadException;
39  import org.apache.commons.imaging.ImageWriteException;
40  import org.apache.commons.imaging.ImagingConstants;
41  import org.apache.commons.imaging.common.BinaryFileParser;
42  import org.apache.commons.imaging.common.BinaryOutputStream;
43  import org.apache.commons.imaging.common.ByteConversions;
44  import org.apache.commons.imaging.formats.jpeg.JpegConstants;
45  import org.apache.commons.imaging.util.Debug;
46  
47  public class IptcParser extends BinaryFileParser {
48      private static final ByteOrder APP13_BYTE_ORDER = ByteOrder.BIG_ENDIAN;
49  
50      public IptcParser() {
51          setByteOrder(ByteOrder.BIG_ENDIAN);
52      }
53  
54      public boolean isPhotoshopJpegSegment(final byte[] segmentData) {
55          if (!startsWith(segmentData,
56                  JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING)) {
57              return false;
58          }
59  
60          final int index = JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING.size();
61          return (index + 4) <= segmentData.length
62                  && ByteConversions.toInt(segmentData, index, APP13_BYTE_ORDER) == JpegConstants.CONST_8BIM;
63      }
64  
65      /*
66       * In practice, App13 segments are only used for Photoshop/IPTC metadata.
67       * However, we should not treat App13 signatures without Photoshop's
68       * signature as Photoshop/IPTC segments.
69       * 
70       * A Photoshop/IPTC App13 segment begins with the Photoshop Identification
71       * string.
72       * 
73       * There follows 0-N blocks (Photoshop calls them "Image Resource Blocks").
74       * 
75       * Each block has the following structure:
76       * 
77       * 1. 4-byte type. This is always "8BIM" for blocks in a Photoshop App13
78       * segment. 2. 2-byte id. IPTC data is stored in blocks with id 0x0404, aka.
79       * IPTC_NAA_RECORD_IMAGE_RESOURCE_ID 3. Block name as a Pascal String. This
80       * is padded to have an even length. 4. 4-byte size (in bytes). 5. Block
81       * data. This is also padded to have an even length.
82       * 
83       * The block data consists of a 0-N records. A record has the following
84       * structure:
85       * 
86       * 1. 2-byte prefix. The value is always 0x1C02 2. 1-byte record type. The
87       * record types are documented by the IPTC. See IptcConstants. 3. 2-byte
88       * record size (in bytes). 4. Record data, "record size" bytes long.
89       * 
90       * Record data (unlike block data) is NOT padded to have an even length.
91       * 
92       * Record data, for IPTC record, should always be ISO-8859-1. But according
93       * to SANSELAN-33, this isn't always the case.
94       * 
95       * The exception is the first record in the block, which must always be a
96       * record version record, whose value is a two-byte number; the value is
97       * 0x02.
98       * 
99       * Some IPTC blocks are missing this first "record version" record, so we
100      * don't require it.
101      */
102     public PhotoshopApp13Data parsePhotoshopSegment(final byte[] bytes, final Map<String, Object> params)
103             throws ImageReadException, IOException {
104         final boolean strict =  params != null && Boolean.TRUE.equals(params.get(ImagingConstants.PARAM_KEY_STRICT));
105         final boolean verbose =  params != null && Boolean.TRUE.equals(params.get(ImagingConstants.PARAM_KEY_VERBOSE));
106 
107         return parsePhotoshopSegment(bytes, verbose, strict);
108     }
109 
110     public PhotoshopApp13Data parsePhotoshopSegment(final byte[] bytes,
111             final boolean verbose, final boolean strict) throws ImageReadException,
112             IOException {
113         final List<IptcRecord> records = new ArrayList<>();
114 
115         final List<IptcBlock> blocks = parseAllBlocks(bytes, verbose, strict);
116 
117         for (final IptcBlock block : blocks) {
118             // Ignore everything but IPTC data.
119             if (!block.isIPTCBlock()) {
120                 continue;
121             }
122 
123             records.addAll(parseIPTCBlock(block.blockData, verbose));
124         }
125 
126         return new PhotoshopApp13Data(records, blocks);
127     }
128 
129     protected List<IptcRecord> parseIPTCBlock(final byte[] bytes, final boolean verbose)
130             throws IOException {
131         final List<IptcRecord> elements = new ArrayList<>();
132 
133         int index = 0;
134         // Integer recordVersion = null;
135         while (index + 1 < bytes.length) {
136             final int tagMarker = 0xff & bytes[index++];
137             if (verbose) {
138                 Debug.debug("tagMarker: " + tagMarker + " (0x" + Integer.toHexString(tagMarker) + ")");
139             }
140 
141             if (tagMarker != IptcConstants.IPTC_RECORD_TAG_MARKER) {
142                 if (verbose) {
143                     System.out.println("Unexpected record tag marker in IPTC data.");
144                 }
145                 return elements;
146             }
147 
148             final int recordNumber = 0xff & bytes[index++];
149             if (verbose) {
150                 Debug.debug("recordNumber: " + recordNumber + " (0x" + Integer.toHexString(recordNumber) + ")");
151             }
152 
153             // int recordPrefix = convertByteArrayToShort("recordPrefix", index,
154             // bytes);
155             // if (verbose)
156             // Debug.debug("recordPrefix", recordPrefix + " (0x"
157             // + Integer.toHexString(recordPrefix) + ")");
158             // index += 2;
159             //
160             // if (recordPrefix != IPTC_RECORD_PREFIX)
161             // {
162             // if (verbose)
163             // System.out
164             // .println("Unexpected record prefix in IPTC data!");
165             // return elements;
166             // }
167 
168             // throw new ImageReadException(
169             // "Unexpected record prefix in IPTC data.");
170 
171             final int recordType = 0xff & bytes[index];
172             if (verbose) {
173                 Debug.debug("recordType: " + recordType + " (0x" + Integer.toHexString(recordType) + ")");
174             }
175             index++;
176 
177             final int recordSize = ByteConversions.toUInt16(bytes, index, getByteOrder());
178             index += 2;
179 
180             final boolean extendedDataset = recordSize > IptcConstants.IPTC_NON_EXTENDED_RECORD_MAXIMUM_SIZE;
181             final int dataFieldCountLength = recordSize & 0x7fff;
182             if (extendedDataset && verbose) {
183                 Debug.debug("extendedDataset. dataFieldCountLength: "
184                         + dataFieldCountLength);
185             }
186             if (extendedDataset) {
187                 // ignore extended dataset and everything after.
188                 return elements;
189             }
190 
191             final byte[] recordData = slice(bytes, index, recordSize);
192             index += recordSize;
193 
194             // Debug.debug("recordSize", recordSize + " (0x"
195             // + Integer.toHexString(recordSize) + ")");
196 
197             if (recordNumber != IptcConstants.IPTC_APPLICATION_2_RECORD_NUMBER) {
198                 continue;
199             }
200 
201             if (recordType == 0) {
202                 if (verbose) {
203                     System.out.println("ignore record version record! "
204                             + elements.size());
205                 }
206                 // ignore "record version" record;
207                 continue;
208             }
209             // if (recordVersion == null)
210             // {
211             // // The first record in a JPEG/Photoshop IPTC block must be
212             // // the record version.
213             // if (recordType != 0)
214             // throw new ImageReadException("Missing record version: "
215             // + recordType);
216             // recordVersion = new Integer(convertByteArrayToShort(
217             // "recordNumber", recordData));
218             //
219             // if (recordSize != 2)
220             // throw new ImageReadException(
221             // "Invalid record version record size: " + recordSize);
222             //
223             // // JPEG/Photoshop IPTC metadata is always in Record version
224             // // 2
225             // if (recordVersion.intValue() != 2)
226             // throw new ImageReadException(
227             // "Invalid IPTC record version: " + recordVersion);
228             //
229             // // Debug.debug("recordVersion", recordVersion);
230             // continue;
231             // }
232 
233             final String value = new String(recordData, "ISO-8859-1");
234 
235             final IptcType iptcType = IptcTypeLookup.getIptcType(recordType);
236 
237             // Debug.debug("iptcType", iptcType);
238             // debugByteArray("iptcData", iptcData);
239             // Debug.debug();
240 
241             // if (recordType == IPTC_TYPE_CREDIT.type
242             // || recordType == IPTC_TYPE_OBJECT_NAME.type)
243             // {
244             // this.debugByteArray("recordData", recordData);
245             // Debug.debug("index", IPTC_TYPE_CREDIT.name);
246             // }
247 
248             final IptcRecord element = new IptcRecord(iptcType, value);
249             elements.add(element);
250         }
251 
252         return elements;
253     }
254 
255     protected List<IptcBlock> parseAllBlocks(final byte[] bytes, final boolean verbose,
256             final boolean strict) throws ImageReadException, IOException {
257         final List<IptcBlock> blocks = new ArrayList<>();
258 
259         try (InputStream bis = new ByteArrayInputStream(bytes)) {
260 
261             // Note that these are unsigned quantities. Name is always an even
262             // number of bytes (including the 1st byte, which is the size.)
263     
264             final byte[] idString = readBytes("", bis, 
265                     JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING.size(),
266                     "App13 Segment missing identification string");
267             if (!JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING.equals(idString)) {
268                 throw new ImageReadException("Not a Photoshop App13 Segment");
269             }
270     
271             // int index = PHOTOSHOP_IDENTIFICATION_STRING.length;
272     
273             while (true) {
274                 final int imageResourceBlockSignature;
275                 try {
276                     imageResourceBlockSignature = read4Bytes("", bis, 
277                             "Image Resource Block missing identification string", APP13_BYTE_ORDER);
278                 } catch (final IOException ioEx) {
279                     break;
280                 }
281                 if (imageResourceBlockSignature != JpegConstants.CONST_8BIM) {
282                     throw new ImageReadException(
283                             "Invalid Image Resource Block Signature");
284                 }
285     
286                 final int blockType = read2Bytes("", bis, "Image Resource Block missing type", APP13_BYTE_ORDER);
287                 if (verbose) {
288                     Debug.debug("blockType: " + blockType + " (0x" + Integer.toHexString(blockType) + ")");
289                 }
290     
291                 final int blockNameLength = readByte("Name length", bis, "Image Resource Block missing name length");
292                 if (verbose && blockNameLength > 0) {
293                     Debug.debug("blockNameLength: " + blockNameLength + " (0x" 
294                             + Integer.toHexString(blockNameLength) + ")");
295                 }
296                 byte[] blockNameBytes;
297                 if (blockNameLength == 0) {
298                     readByte("Block name bytes", bis, "Image Resource Block has invalid name");
299                     blockNameBytes = new byte[0];
300                 } else {
301                     try {
302                         blockNameBytes = readBytes("", bis, blockNameLength,
303                                 "Invalid Image Resource Block name");
304                     } catch (final IOException ioEx) {
305                         if (strict) {
306                             throw ioEx;
307                         }
308                         break;
309                     }
310     
311                     if (blockNameLength % 2 == 0) {
312                         readByte("Padding byte", bis, "Image Resource Block missing padding byte");
313                     }
314                 }
315     
316                 final int blockSize = read4Bytes("", bis, "Image Resource Block missing size", APP13_BYTE_ORDER);
317                 if (verbose) {
318                     Debug.debug("blockSize: " + blockSize + " (0x" + Integer.toHexString(blockSize) + ")");
319                 }
320     
321                 /*
322                  * doesn't catch cases where blocksize is invalid but is still less
323                  * than bytes.length but will at least prevent OutOfMemory errors
324                  */
325                 if (blockSize > bytes.length) {
326                     throw new ImageReadException("Invalid Block Size : " + blockSize + " > " + bytes.length);
327                 }
328     
329                 final byte[] blockData;
330                 try {
331                     blockData = readBytes("", bis, blockSize, "Invalid Image Resource Block data");
332                 } catch (final IOException ioEx) {
333                     if (strict) {
334                         throw ioEx;
335                     }
336                     break;
337                 }
338     
339                 blocks.add(new IptcBlock(blockType, blockNameBytes, blockData));
340     
341                 if ((blockSize % 2) != 0) {
342                     readByte("Padding byte", bis, "Image Resource Block missing padding byte");
343                 }
344             }
345     
346             return blocks;
347         }
348     }
349 
350     // private void writeIPTCRecord(BinaryOutputStream bos, )
351 
352     public byte[] writePhotoshopApp13Segment(final PhotoshopApp13Data data)
353             throws IOException, ImageWriteException {
354         final ByteArrayOutputStream os = new ByteArrayOutputStream();
355         final BinaryOutputStream bos = new BinaryOutputStream(os);
356 
357         JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING.writeTo(bos);
358 
359         final List<IptcBlock> blocks = data.getRawBlocks();
360         for (final IptcBlock block : blocks) {
361             bos.write4Bytes(JpegConstants.CONST_8BIM);
362 
363             if (block.blockType < 0 || block.blockType > 0xffff) {
364                 throw new ImageWriteException("Invalid IPTC block type.");
365             }
366             bos.write2Bytes(block.blockType);
367 
368             if (block.blockNameBytes.length > 255) {
369                 throw new ImageWriteException("IPTC block name is too long: "
370                         + block.blockNameBytes.length);
371             }
372             bos.write(block.blockNameBytes.length);
373             bos.write(block.blockNameBytes);
374             if (block.blockNameBytes.length % 2 == 0) {
375                 bos.write(0); // pad to even size, including length byte.
376             }
377 
378             if (block.blockData.length > IptcConstants.IPTC_NON_EXTENDED_RECORD_MAXIMUM_SIZE) {
379                 throw new ImageWriteException("IPTC block data is too long: "
380                         + block.blockData.length);
381             }
382             bos.write4Bytes(block.blockData.length);
383             bos.write(block.blockData);
384             if (block.blockData.length % 2 == 1) {
385                 bos.write(0); // pad to even size
386             }
387 
388         }
389 
390         bos.flush();
391         return os.toByteArray();
392     }
393 
394     public byte[] writeIPTCBlock(List<IptcRecord> elements)
395             throws ImageWriteException, IOException {
396         byte[] blockData;
397         final ByteArrayOutputStream baos = new ByteArrayOutputStream();
398         try (BinaryOutputStream bos = new BinaryOutputStream(baos, getByteOrder())) {
399     
400             // first, right record version record
401             bos.write(IptcConstants.IPTC_RECORD_TAG_MARKER);
402             bos.write(IptcConstants.IPTC_APPLICATION_2_RECORD_NUMBER);
403             bos.write(IptcTypes.RECORD_VERSION.type); // record version record
404                                                       // type.
405             bos.write2Bytes(2); // record version record size
406             bos.write2Bytes(2); // record version value
407     
408             // make a copy of the list.
409             elements = new ArrayList<>(elements);
410     
411             // sort the list. Records must be in numerical order.
412             final Comparator<IptcRecord> comparator = new Comparator<IptcRecord>() {
413                 @Override
414                 public int compare(final IptcRecord e1, final IptcRecord e2) {
415                     return e2.iptcType.getType() - e1.iptcType.getType();
416                 }
417             };
418             Collections.sort(elements, comparator);
419             // TODO: make sure order right
420     
421             // write the list.
422             for (final IptcRecord element : elements) {
423                 if (element.iptcType == IptcTypes.RECORD_VERSION) {
424                     continue; // ignore
425                 }
426 
427                 bos.write(IptcConstants.IPTC_RECORD_TAG_MARKER);
428                 bos.write(IptcConstants.IPTC_APPLICATION_2_RECORD_NUMBER);
429                 if (element.iptcType.getType() < 0
430                         || element.iptcType.getType() > 0xff) {
431                     throw new ImageWriteException("Invalid record type: "
432                             + element.iptcType.getType());
433                 }
434                 bos.write(element.iptcType.getType());
435 
436                 final byte[] recordData = element.getValue().getBytes("ISO-8859-1");
437                 if (!new String(recordData, "ISO-8859-1").equals(element.getValue())) {
438                     throw new ImageWriteException(
439                             "Invalid record value, not ISO-8859-1");
440                 }
441 
442                 bos.write2Bytes(recordData.length);
443                 bos.write(recordData);
444             }
445         }
446 
447         blockData = baos.toByteArray();
448 
449         return blockData;
450     }
451 
452 }