001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 */
017package org.apache.commons.compress.harmony.unpack200;
018
019import java.io.BufferedInputStream;
020import java.io.ByteArrayInputStream;
021import java.io.ByteArrayOutputStream;
022import java.io.DataOutputStream;
023import java.io.IOException;
024import java.io.InputStream;
025import java.io.OutputStream;
026import java.io.OutputStreamWriter;
027import java.io.PrintWriter;
028import java.nio.charset.Charset;
029import java.util.ArrayList;
030import java.util.HashSet;
031import java.util.List;
032import java.util.Set;
033import java.util.TimeZone;
034import java.util.jar.JarEntry;
035import java.util.jar.JarOutputStream;
036import java.util.zip.CRC32;
037import java.util.zip.GZIPInputStream;
038import java.util.zip.ZipEntry;
039
040import org.apache.commons.compress.harmony.pack200.Codec;
041import org.apache.commons.compress.harmony.pack200.Pack200Exception;
042import org.apache.commons.compress.harmony.unpack200.bytecode.Attribute;
043import org.apache.commons.compress.harmony.unpack200.bytecode.CPClass;
044import org.apache.commons.compress.harmony.unpack200.bytecode.CPField;
045import org.apache.commons.compress.harmony.unpack200.bytecode.CPMethod;
046import org.apache.commons.compress.harmony.unpack200.bytecode.CPUTF8;
047import org.apache.commons.compress.harmony.unpack200.bytecode.ClassConstantPool;
048import org.apache.commons.compress.harmony.unpack200.bytecode.ClassFile;
049import org.apache.commons.compress.harmony.unpack200.bytecode.ClassFileEntry;
050import org.apache.commons.compress.harmony.unpack200.bytecode.InnerClassesAttribute;
051import org.apache.commons.compress.harmony.unpack200.bytecode.SourceFileAttribute;
052import org.apache.commons.io.input.BoundedInputStream;
053
054/**
055 * A Pack200 archive consists of one or more segments. Each segment is stand-alone, in the sense that every segment has the magic number header; thus, every
056 * segment is also a valid archive. However, it is possible to combine (non-GZipped) archives into a single large archive by concatenation alone. Thus, all the
057 * hard work in unpacking an archive falls to understanding a segment.
058 *
059 * The first component of a segment is the header; this contains (amongst other things) the expected counts of constant pool entries, which in turn defines how
060 * many values need to be read from the stream. Because values are variable width (see {@link Codec}), it is not possible to calculate the start of the next
061 * segment, although one of the header values does hint at the size of the segment if non-zero, which can be used for buffering purposes.
062 *
063 * Note that this does not perform any buffering of the input stream; each value will be read on a byte-by-byte basis. It does not perform GZip decompression
064 * automatically; both of these are expected to be done by the caller if the stream has the magic header for GZip streams ({@link GZIPInputStream#GZIP_MAGIC}).
065 * In any case, if GZip decompression is being performed the input stream will be buffered at a higher level, and thus this can read on a byte-oriented basis.
066 */
067public class Segment {
068
069    public static final int LOG_LEVEL_VERBOSE = 2;
070
071    public static final int LOG_LEVEL_STANDARD = 1;
072
073    public static final int LOG_LEVEL_QUIET = 0;
074
075    private SegmentHeader header;
076
077    private CpBands cpBands;
078
079    private AttrDefinitionBands attrDefinitionBands;
080
081    private IcBands icBands;
082
083    private ClassBands classBands;
084
085    private BcBands bcBands;
086
087    private FileBands fileBands;
088
089    private boolean overrideDeflateHint;
090
091    private boolean deflateHint;
092
093    private boolean doPreRead;
094
095    private int logLevel;
096
097    private PrintWriter logStream;
098
099    private byte[][] classFilesContents;
100
101    private boolean[] fileDeflate;
102
103    private boolean[] fileIsClass;
104
105    private InputStream internalBuffer;
106
107    private ClassFile buildClassFile(final int classNum) {
108        final ClassFile classFile = new ClassFile();
109        final int[] major = classBands.getClassVersionMajor();
110        final int[] minor = classBands.getClassVersionMinor();
111        if (major != null) {
112            classFile.major = major[classNum];
113            classFile.minor = minor[classNum];
114        } else {
115            classFile.major = header.getDefaultClassMajorVersion();
116            classFile.minor = header.getDefaultClassMinorVersion();
117        }
118        // build constant pool
119        final ClassConstantPool cp = classFile.pool;
120        final int fullNameIndexInCpClass = classBands.getClassThisInts()[classNum];
121        final String fullName = cpBands.getCpClass()[fullNameIndexInCpClass];
122        // SourceFile attribute
123        int i = fullName.lastIndexOf("/") + 1; // if lastIndexOf==-1, then
124        // -1+1=0, so str.substring(0)
125        // == str
126
127        // Get the source file attribute
128        final List<Attribute> classAttributes = classBands.getClassAttributes()[classNum];
129        SourceFileAttribute sourceFileAttribute = null;
130        for (final Attribute classAttribute : classAttributes) {
131            if (classAttribute.isSourceFileAttribute()) {
132                sourceFileAttribute = (SourceFileAttribute) classAttribute;
133            }
134        }
135
136        if (sourceFileAttribute == null) {
137            // If we don't have a source file attribute yet, we need
138            // to infer it from the class.
139            final AttributeLayout SOURCE_FILE = attrDefinitionBands.getAttributeDefinitionMap().getAttributeLayout(AttributeLayout.ATTRIBUTE_SOURCE_FILE,
140                    AttributeLayout.CONTEXT_CLASS);
141            if (SOURCE_FILE.matches(classBands.getRawClassFlags()[classNum])) {
142                int firstDollar = -1;
143                for (int index = 0; index < fullName.length(); index++) {
144                    if (fullName.charAt(index) <= '$') {
145                        firstDollar = index;
146                    }
147                }
148                String fileName;
149
150                if (firstDollar > -1 && i <= firstDollar) {
151                    fileName = fullName.substring(i, firstDollar) + ".java";
152                } else {
153                    fileName = fullName.substring(i) + ".java";
154                }
155                sourceFileAttribute = new SourceFileAttribute(cpBands.cpUTF8Value(fileName, false));
156                classFile.attributes = new Attribute[] { (Attribute) cp.add(sourceFileAttribute) };
157            } else {
158                classFile.attributes = new Attribute[] {};
159            }
160        } else {
161            classFile.attributes = new Attribute[] { (Attribute) cp.add(sourceFileAttribute) };
162        }
163
164        // If we see any class attributes, add them to the class's attributes
165        // that will
166        // be written out. Keep SourceFileAttributes out since we just
167        // did them above.
168        final List<Attribute> classAttributesWithoutSourceFileAttribute = new ArrayList<>(classAttributes.size());
169        for (int index = 0; index < classAttributes.size(); index++) {
170            final Attribute attrib = classAttributes.get(index);
171            if (!attrib.isSourceFileAttribute()) {
172                classAttributesWithoutSourceFileAttribute.add(attrib);
173            }
174        }
175        final Attribute[] originalAttributes = classFile.attributes;
176        classFile.attributes = new Attribute[originalAttributes.length + classAttributesWithoutSourceFileAttribute.size()];
177        System.arraycopy(originalAttributes, 0, classFile.attributes, 0, originalAttributes.length);
178        for (int index = 0; index < classAttributesWithoutSourceFileAttribute.size(); index++) {
179            final Attribute attrib = classAttributesWithoutSourceFileAttribute.get(index);
180            cp.add(attrib);
181            classFile.attributes[originalAttributes.length + index] = attrib;
182        }
183
184        // this/superclass
185        final ClassFileEntry cfThis = cp.add(cpBands.cpClassValue(fullNameIndexInCpClass));
186        final ClassFileEntry cfSuper = cp.add(cpBands.cpClassValue(classBands.getClassSuperInts()[classNum]));
187        // add interfaces
188        final ClassFileEntry[] cfInterfaces = new ClassFileEntry[classBands.getClassInterfacesInts()[classNum].length];
189        for (i = 0; i < cfInterfaces.length; i++) {
190            cfInterfaces[i] = cp.add(cpBands.cpClassValue(classBands.getClassInterfacesInts()[classNum][i]));
191        }
192        // add fields
193        final ClassFileEntry[] cfFields = new ClassFileEntry[classBands.getClassFieldCount()[classNum]];
194        // fieldDescr and fieldFlags used to create this
195        for (i = 0; i < cfFields.length; i++) {
196            final int descriptorIndex = classBands.getFieldDescrInts()[classNum][i];
197            final int nameIndex = cpBands.getCpDescriptorNameInts()[descriptorIndex];
198            final int typeIndex = cpBands.getCpDescriptorTypeInts()[descriptorIndex];
199            final CPUTF8 name = cpBands.cpUTF8Value(nameIndex);
200            final CPUTF8 descriptor = cpBands.cpSignatureValue(typeIndex);
201            cfFields[i] = cp.add(new CPField(name, descriptor, classBands.getFieldFlags()[classNum][i], classBands.getFieldAttributes()[classNum][i]));
202        }
203        // add methods
204        final ClassFileEntry[] cfMethods = new ClassFileEntry[classBands.getClassMethodCount()[classNum]];
205        // methodDescr and methodFlags used to create this
206        for (i = 0; i < cfMethods.length; i++) {
207            final int descriptorIndex = classBands.getMethodDescrInts()[classNum][i];
208            final int nameIndex = cpBands.getCpDescriptorNameInts()[descriptorIndex];
209            final int typeIndex = cpBands.getCpDescriptorTypeInts()[descriptorIndex];
210            final CPUTF8 name = cpBands.cpUTF8Value(nameIndex);
211            final CPUTF8 descriptor = cpBands.cpSignatureValue(typeIndex);
212            cfMethods[i] = cp.add(new CPMethod(name, descriptor, classBands.getMethodFlags()[classNum][i], classBands.getMethodAttributes()[classNum][i]));
213        }
214        cp.addNestedEntries();
215
216        // add inner class attribute (if required)
217        boolean addInnerClassesAttr = false;
218        final IcTuple[] icLocal = getClassBands().getIcLocal()[classNum];
219        final boolean icLocalSent = icLocal != null;
220        final InnerClassesAttribute innerClassesAttribute = new InnerClassesAttribute("InnerClasses");
221        final IcTuple[] icRelevant = getIcBands().getRelevantIcTuples(fullName, cp);
222        final List<IcTuple> ic_stored = computeIcStored(icLocal, icRelevant);
223        for (final IcTuple icStored : ic_stored) {
224            final int innerClassIndex = icStored.thisClassIndex();
225            final int outerClassIndex = icStored.outerClassIndex();
226            final int simpleClassNameIndex = icStored.simpleClassNameIndex();
227
228            final String innerClassString = icStored.thisClassString();
229            final String outerClassString = icStored.outerClassString();
230            final String simpleClassName = icStored.simpleClassName();
231
232            CPUTF8 innerName = null;
233            CPClass outerClass = null;
234
235            final CPClass innerClass = innerClassIndex != -1 ? cpBands.cpClassValue(innerClassIndex) : cpBands.cpClassValue(innerClassString);
236            if (!icStored.isAnonymous()) {
237                innerName = simpleClassNameIndex != -1 ? cpBands.cpUTF8Value(simpleClassNameIndex) : cpBands.cpUTF8Value(simpleClassName);
238            }
239
240            if (icStored.isMember()) {
241                outerClass = outerClassIndex != -1 ? cpBands.cpClassValue(outerClassIndex) : cpBands.cpClassValue(outerClassString);
242            }
243            final int flags = icStored.F;
244            innerClassesAttribute.addInnerClassesEntry(innerClass, outerClass, innerName, flags);
245            addInnerClassesAttr = true;
246        }
247        // If ic_local is sent, and it's empty, don't add
248        // the inner classes attribute.
249        if (icLocalSent && icLocal.length == 0) {
250            addInnerClassesAttr = false;
251        }
252
253        // If ic_local is not sent and ic_relevant is empty,
254        // don't add the inner class attribute.
255        if (!icLocalSent && icRelevant.length == 0) {
256            addInnerClassesAttr = false;
257        }
258
259        if (addInnerClassesAttr) {
260            // Need to add the InnerClasses attribute to the
261            // existing classFile attributes.
262            final Attribute[] originalAttrs = classFile.attributes;
263            final Attribute[] newAttrs = new Attribute[originalAttrs.length + 1];
264            System.arraycopy(originalAttrs, 0, newAttrs, 0, originalAttrs.length);
265            newAttrs[newAttrs.length - 1] = innerClassesAttribute;
266            classFile.attributes = newAttrs;
267            cp.addWithNestedEntries(innerClassesAttribute);
268        }
269        // sort CP according to cp_All
270        cp.resolve(this);
271        // NOTE the indexOf is only valid after the cp.resolve()
272        // build up remainder of file
273        classFile.accessFlags = (int) classBands.getClassFlags()[classNum];
274        classFile.thisClass = cp.indexOf(cfThis);
275        classFile.superClass = cp.indexOf(cfSuper);
276        // TODO placate format of file for writing purposes
277        classFile.interfaces = new int[cfInterfaces.length];
278        for (i = 0; i < cfInterfaces.length; i++) {
279            classFile.interfaces[i] = cp.indexOf(cfInterfaces[i]);
280        }
281        classFile.fields = cfFields;
282        classFile.methods = cfMethods;
283        return classFile;
284    }
285
286    /**
287     * Given an ic_local and an ic_relevant, use them to calculate what should be added as ic_stored.
288     *
289     * @param icLocal    IcTuple[] array of local transmitted tuples
290     * @param icRelevant IcTuple[] array of relevant tuples
291     * @return List of tuples to be stored. If ic_local is null or empty, the values returned may not be correct. The caller will have to determine if this is
292     *         the case.
293     */
294    private List<IcTuple> computeIcStored(final IcTuple[] icLocal, final IcTuple[] icRelevant) {
295        final List<IcTuple> result = new ArrayList<>(icRelevant.length);
296        final List<IcTuple> duplicates = new ArrayList<>(icRelevant.length);
297        final Set<IcTuple> isInResult = new HashSet<>(icRelevant.length);
298
299        // need to compute:
300        // result = ic_local XOR ic_relevant
301
302        // add ic_local
303        if (icLocal != null) {
304            for (final IcTuple element : icLocal) {
305                if (isInResult.add(element)) {
306                    result.add(element);
307                }
308            }
309        }
310
311        // add ic_relevant
312        for (final IcTuple element : icRelevant) {
313            if (isInResult.add(element)) {
314                result.add(element);
315            } else {
316                duplicates.add(element);
317            }
318        }
319
320        // eliminate "duplicates"
321        duplicates.forEach(result::remove);
322
323        return result;
324    }
325
326    protected AttrDefinitionBands getAttrDefinitionBands() {
327        return attrDefinitionBands;
328    }
329
330    protected ClassBands getClassBands() {
331        return classBands;
332    }
333
334    public SegmentConstantPool getConstantPool() {
335        return cpBands.getConstantPool();
336    }
337
338    protected CpBands getCpBands() {
339        return cpBands;
340    }
341
342    protected IcBands getIcBands() {
343        return icBands;
344    }
345
346    public SegmentHeader getSegmentHeader() {
347        return header;
348    }
349
350    public void log(final int logLevel, final String message) {
351        if (this.logLevel >= logLevel) {
352            logStream.println(message);
353        }
354    }
355
356    /**
357     * Override the archive's deflate hint with the given boolean
358     *
359     * @param deflateHint - the deflate hint to use
360     */
361    public void overrideDeflateHint(final boolean deflateHint) {
362        this.overrideDeflateHint = true;
363        this.deflateHint = deflateHint;
364    }
365
366    /**
367     * This performs the actual work of parsing against a non-static instance of Segment. This method is intended to run concurrently for multiple segments.
368     *
369     * @throws IOException      if a problem occurs during reading from the underlying stream
370     * @throws Pack200Exception if a problem occurs with an unexpected value or unsupported codec
371     */
372    private void parseSegment() throws IOException, Pack200Exception {
373
374        header.unpack();
375        cpBands.unpack();
376        attrDefinitionBands.unpack();
377        icBands.unpack();
378        classBands.unpack();
379        bcBands.unpack();
380        fileBands.unpack();
381
382        int classNum = 0;
383        final int numberOfFiles = header.getNumberOfFiles();
384        final String[] fileName = fileBands.getFileName();
385        final int[] fileOptions = fileBands.getFileOptions();
386        final SegmentOptions options = header.getOptions();
387
388        classFilesContents = new byte[numberOfFiles][];
389        fileDeflate = new boolean[numberOfFiles];
390        fileIsClass = new boolean[numberOfFiles];
391
392        final ByteArrayOutputStream bos = new ByteArrayOutputStream();
393        final DataOutputStream dos = new DataOutputStream(bos);
394
395        for (int i = 0; i < numberOfFiles; i++) {
396            String name = fileName[i];
397
398            final boolean nameIsEmpty = name == null || name.isEmpty();
399            final boolean isClass = (fileOptions[i] & 2) == 2 || nameIsEmpty;
400            if (isClass && nameIsEmpty) {
401                name = cpBands.getCpClass()[classBands.getClassThisInts()[classNum]] + ".class";
402                fileName[i] = name;
403            }
404
405            if (!overrideDeflateHint) {
406                fileDeflate[i] = (fileOptions[i] & 1) == 1 || options.shouldDeflate();
407            } else {
408                fileDeflate[i] = deflateHint;
409            }
410
411            fileIsClass[i] = isClass;
412
413            if (isClass) {
414                final ClassFile classFile = buildClassFile(classNum);
415                classFile.write(dos);
416                dos.flush();
417
418                classFilesContents[classNum] = bos.toByteArray();
419                bos.reset();
420
421                classNum++;
422            }
423        }
424    }
425
426    /**
427     * This performs reading the data from the stream into non-static instance of Segment. After the completion of this method stream can be freed.
428     *
429     * @param in the input stream to read from
430     * @throws IOException      if a problem occurs during reading from the underlying stream
431     * @throws Pack200Exception if a problem occurs with an unexpected value or unsupported codec
432     */
433    private void readSegment(final InputStream in) throws IOException, Pack200Exception {
434        log(LOG_LEVEL_VERBOSE, "-------");
435        cpBands = new CpBands(this);
436        cpBands.read(in);
437        attrDefinitionBands = new AttrDefinitionBands(this);
438        attrDefinitionBands.read(in);
439        icBands = new IcBands(this);
440        icBands.read(in);
441        classBands = new ClassBands(this);
442        classBands.read(in);
443        bcBands = new BcBands(this);
444        bcBands.read(in);
445        fileBands = new FileBands(this);
446        fileBands.read(in);
447
448        fileBands.processFileBits();
449    }
450
451    public void setLogLevel(final int logLevel) {
452        this.logLevel = logLevel;
453    }
454
455    public void setLogStream(final OutputStream logStream) {
456        this.logStream = new PrintWriter(new OutputStreamWriter(logStream, Charset.defaultCharset()), false);
457    }
458
459    public void setPreRead(final boolean value) {
460        doPreRead = value;
461    }
462
463    /**
464     * Unpacks a packed stream (either .pack. or .pack.gz) into a corresponding JarOuputStream.
465     *
466     * @param inputStream  a packed input stream, preferably a {@link BoundedInputStream}.
467     * @param out output stream.
468     * @throws Pack200Exception if there is a problem unpacking
469     * @throws IOException      if there is a problem with I/O during unpacking
470     */
471    public void unpack(final InputStream inputStream, final JarOutputStream out) throws IOException, Pack200Exception {
472        unpackRead(inputStream);
473        unpackProcess();
474        unpackWrite(out);
475    }
476
477    void unpackProcess() throws IOException, Pack200Exception {
478        if (internalBuffer != null) {
479            readSegment(internalBuffer);
480        }
481        parseSegment();
482    }
483
484    /*
485     * Package-private accessors for unpacking stages
486     */
487    void unpackRead(final InputStream inputStream) throws IOException, Pack200Exception {
488        @SuppressWarnings("resource")
489        final InputStream in = Pack200UnpackerAdapter.newBoundedInputStream(inputStream);
490
491        header = new SegmentHeader(this);
492        header.read(in);
493
494        final int size = (int) header.getArchiveSize() - header.getArchiveSizeOffset();
495
496        if (doPreRead && header.getArchiveSize() != 0) {
497            final byte[] data = new byte[size];
498            in.read(data);
499            internalBuffer = new BufferedInputStream(new ByteArrayInputStream(data));
500        } else {
501            readSegment(in);
502        }
503    }
504
505    void unpackWrite(final JarOutputStream out) throws IOException {
506        writeJar(out);
507        if (logStream != null) {
508            logStream.close();
509        }
510    }
511
512    /**
513     * Writes the segment to an output stream. The output stream should be pre-buffered for efficiency. Also takes the same input stream for reading, since the
514     * file bits may not be loaded and thus just copied from one stream to another. Doesn't close the output stream when finished, in case there are more
515     * entries (e.g. further segments) to be written.
516     *
517     * @param out the JarOutputStream to write data to
518     * @throws IOException if an error occurs while reading or writing to the streams
519     */
520    public void writeJar(final JarOutputStream out) throws IOException {
521        final String[] fileName = fileBands.getFileName();
522        final int[] fileModtime = fileBands.getFileModtime();
523        final long[] fileSize = fileBands.getFileSize();
524        final byte[][] fileBits = fileBands.getFileBits();
525
526        // now write the files out
527        int classNum = 0;
528        final int numberOfFiles = header.getNumberOfFiles();
529        final long archiveModtime = header.getArchiveModtime();
530
531        for (int i = 0; i < numberOfFiles; i++) {
532            final String name = fileName[i];
533            // For Pack200 archives, modtime is in seconds
534            // from the epoch. JarEntries need it to be in
535            // milliseconds from the epoch.
536            // Even though we're adding two longs and multiplying
537            // by 1000, we won't overflow because both longs are
538            // always under 2^32.
539            final long modtime = 1000 * (archiveModtime + fileModtime[i]);
540            final boolean deflate = fileDeflate[i];
541
542            final JarEntry entry = new JarEntry(name);
543            if (deflate) {
544                entry.setMethod(ZipEntry.DEFLATED);
545            } else {
546                entry.setMethod(ZipEntry.STORED);
547                final CRC32 crc = new CRC32();
548                if (fileIsClass[i]) {
549                    crc.update(classFilesContents[classNum]);
550                    entry.setSize(classFilesContents[classNum].length);
551                } else {
552                    crc.update(fileBits[i]);
553                    entry.setSize(fileSize[i]);
554                }
555                entry.setCrc(crc.getValue());
556            }
557            // On Windows at least, need to correct for timezone
558            entry.setTime(modtime - TimeZone.getDefault().getRawOffset());
559            out.putNextEntry(entry);
560
561            // write to output stream
562            if (fileIsClass[i]) {
563                entry.setSize(classFilesContents[classNum].length);
564                out.write(classFilesContents[classNum]);
565                classNum++;
566            } else {
567                entry.setSize(fileSize[i]);
568                out.write(fileBits[i]);
569            }
570        }
571    }
572
573}