BandSet.java

  1. /*
  2.  *  Licensed to the Apache Software Foundation (ASF) under one or more
  3.  *  contributor license agreements.  See the NOTICE file distributed with
  4.  *  this work for additional information regarding copyright ownership.
  5.  *  The ASF licenses this file to You under the Apache License, Version 2.0
  6.  *  (the "License"); you may not use this file except in compliance with
  7.  *  the License.  You may obtain a copy of the License at
  8.  *
  9.  *     http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  *  Unless required by applicable law or agreed to in writing, software
  12.  *  distributed under the License is distributed on an "AS IS" BASIS,
  13.  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  *  See the License for the specific language governing permissions and
  15.  *  limitations under the License.
  16.  */
  17. package org.apache.commons.compress.harmony.pack200;

  18. import java.io.IOException;
  19. import java.io.OutputStream;
  20. import java.util.ArrayList;
  21. import java.util.Arrays;
  22. import java.util.HashMap;
  23. import java.util.List;
  24. import java.util.Map;
  25. import java.util.stream.IntStream;

  26. /**
  27.  * Abstract superclass for a set of bands
  28.  */
  29. public abstract class BandSet {

  30.     /**
  31.      * Results obtained by trying different Codecs to encode a band
  32.      */
  33.     public class BandAnalysisResults {

  34.         // The number of Codecs tried so far
  35.         private int numCodecsTried;

  36.         // The number of bytes saved by using betterCodec instead of the default codec
  37.         private int saved;

  38.         // Extra metadata to pass to the segment header (to be appended to the
  39.         // band_headers band)
  40.         private int[] extraMetadata;

  41.         // The results of encoding the band with betterCodec
  42.         private byte[] encodedBand;

  43.         // The best Codec found so far, or should be null if the default is the
  44.         // best so far
  45.         private Codec betterCodec;

  46.     }

  47.     /**
  48.      * BandData represents information about a band, e.g. largest value etc and is used in the heuristics that calculate whether an alternative Codec could make
  49.      * the encoded band smaller.
  50.      */
  51.     public class BandData {

  52.         private final int[] band;
  53.         private int smallest = Integer.MAX_VALUE;
  54.         private int largest = Integer.MIN_VALUE;
  55.         private int smallestDelta;
  56.         private int largestDelta;

  57.         private int deltaIsAscending;
  58.         private int smallDeltaCount;

  59.         private double averageAbsoluteDelta;
  60.         private double averageAbsoluteValue;

  61.         private Map<Integer, Integer> distinctValues;

  62.         /**
  63.          * Constructs a new instance of BandData. The band is then analysed.
  64.          *
  65.          * @param band the band of integers
  66.          */
  67.         public BandData(final int[] band) {
  68.             this.band = band;
  69.             final Integer one = Integer.valueOf(1);
  70.             for (int i = 0; i < band.length; i++) {
  71.                 if (band[i] < smallest) {
  72.                     smallest = band[i];
  73.                 }
  74.                 if (band[i] > largest) {
  75.                     largest = band[i];
  76.                 }
  77.                 if (i != 0) {
  78.                     final int delta = band[i] - band[i - 1];
  79.                     if (delta < smallestDelta) {
  80.                         smallestDelta = delta;
  81.                     }
  82.                     if (delta > largestDelta) {
  83.                         largestDelta = delta;
  84.                     }
  85.                     if (delta >= 0) {
  86.                         deltaIsAscending++;
  87.                     }
  88.                     averageAbsoluteDelta += (double) Math.abs(delta) / (double) (band.length - 1);
  89.                     if (Math.abs(delta) < 256) {
  90.                         smallDeltaCount++;
  91.                     }
  92.                 } else {
  93.                     smallestDelta = band[0];
  94.                     largestDelta = band[0];
  95.                 }
  96.                 averageAbsoluteValue += (double) Math.abs(band[i]) / (double) band.length;
  97.                 if (effort > 3) { // do calculations needed to consider population codec
  98.                     if (distinctValues == null) {
  99.                         distinctValues = new HashMap<>();
  100.                     }
  101.                     final Integer value = Integer.valueOf(band[i]);
  102.                     Integer count = distinctValues.get(value);
  103.                     if (count == null) {
  104.                         count = one;
  105.                     } else {
  106.                         count = Integer.valueOf(count.intValue() + 1);
  107.                     }
  108.                     distinctValues.put(value, count);
  109.                 }
  110.             }
  111.         }

  112.         /**
  113.          * Returns true if any band elements are negative.
  114.          *
  115.          * @return true if any band elements are negative.
  116.          */
  117.         public boolean anyNegatives() {
  118.             return smallest < 0;
  119.         }

  120.         /**
  121.          * Returns true if the band deltas are mainly positive (heuristic).
  122.          *
  123.          * @return true if the band deltas are mainly positive (heuristic).
  124.          */
  125.         public boolean mainlyPositiveDeltas() {
  126.             // Note: the value below has been tuned - please test carefully if changing it
  127.             return (float) deltaIsAscending / (float) band.length > 0.95F;
  128.         }

  129.         /**
  130.          * Returns true if the deltas between adjacent band elements are mainly small (heuristic).
  131.          *
  132.          * @return true if the deltas between adjacent band elements are mainly small (heuristic).
  133.          */
  134.         public boolean mainlySmallDeltas() {
  135.             // Note: the value below has been tuned - please test carefully if changing it
  136.             return (float) smallDeltaCount / (float) band.length > 0.7F;
  137.         }

  138.         /**
  139.          * Returns the total number of distinct values found in the band.
  140.          *
  141.          * @return the total number of distinct values found in the band.
  142.          */
  143.         public int numDistinctValues() {
  144.             if (distinctValues == null) {
  145.                 return band.length;
  146.             }
  147.             return distinctValues.size();
  148.         }

  149.         /**
  150.          * Returns true if the band is well correlated (i.e. would be suitable for a delta encoding) (heuristic).
  151.          *
  152.          * @return true if the band is well correlated (i.e. would be suitable for a delta encoding) (heuristic).
  153.          */
  154.         public boolean wellCorrelated() {
  155.             // Note: the value below has been tuned - please test carefully if changing it
  156.             return averageAbsoluteDelta * 3.1 < averageAbsoluteValue;
  157.         }

  158.     }

  159.     private static final byte[] EMPTY_BYTE_ARRAY = {};

  160.     // Minimum size of band for each effort level where we consider alternative codecs
  161.     // Note: these values have been tuned - please test carefully if changing them
  162.     private static final int[] effortThresholds = { 0, 0, 1000, 500, 100, 100, 100, 100, 100, 0 };

  163.     protected final SegmentHeader segmentHeader;
  164.     final int effort;

  165.     private long[] canonicalLargest;

  166.     private long[] canonicalSmallest;

  167.     /**
  168.      * Constructs a new BandSet.
  169.      *
  170.      * @param effort the packing effort to be used (must be 1-9)
  171.      * @param header the segment header
  172.      */
  173.     public BandSet(final int effort, final SegmentHeader header) {
  174.         this.effort = effort;
  175.         this.segmentHeader = header;
  176.     }

  177.     private BandAnalysisResults analyseBand(final String name, final int[] band, final BHSDCodec defaultCodec) throws Pack200Exception {

  178.         final BandAnalysisResults results = new BandAnalysisResults();

  179.         if (canonicalLargest == null) {
  180.             canonicalLargest = new long[116];
  181.             canonicalSmallest = new long[116];
  182.             for (int i = 1; i < canonicalLargest.length; i++) {
  183.                 canonicalLargest[i] = CodecEncoding.getCanonicalCodec(i).largest();
  184.                 canonicalSmallest[i] = CodecEncoding.getCanonicalCodec(i).smallest();
  185.             }
  186.         }
  187.         final BandData bandData = new BandData(band);

  188.         // Check that there is a reasonable saving to be made
  189.         final byte[] encoded = defaultCodec.encode(band);
  190.         results.encodedBand = encoded;

  191.         // Note: these values have been tuned - please test carefully if changing them
  192.         if (encoded.length <= band.length + 23 - 2 * effort) { // TODO: tweak
  193.             return results;
  194.         }

  195.         // Check if we can use BYTE1 as that's a 1:1 mapping if we can
  196.         if (!bandData.anyNegatives() && bandData.largest <= Codec.BYTE1.largest()) {
  197.             results.encodedBand = Codec.BYTE1.encode(band);
  198.             results.betterCodec = Codec.BYTE1;
  199.             return results;
  200.         }

  201.         // Consider a population codec (but can't be nested)
  202.         if (effort > 3 && !name.equals("POPULATION")) {
  203.             final int numDistinctValues = bandData.numDistinctValues();
  204.             final float distinctValuesAsProportion = (float) numDistinctValues / (float) band.length;

  205.             // Note: these values have been tuned - please test carefully if changing them
  206.             if (numDistinctValues < 100 || distinctValuesAsProportion < 0.02 || effort > 6 && distinctValuesAsProportion < 0.04) { // TODO: tweak
  207.                 encodeWithPopulationCodec(band, defaultCodec, bandData, results);
  208.                 if (timeToStop(results)) {
  209.                     return results;
  210.                 }
  211.             }
  212.         }

  213.         final List<BHSDCodec[]> codecFamiliesToTry = new ArrayList<>();

  214.         // See if the deltas are mainly small increments
  215.         if (bandData.mainlyPositiveDeltas() && bandData.mainlySmallDeltas()) {
  216.             codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs2);
  217.         }

  218.         if (bandData.wellCorrelated()) { // Try delta encodings
  219.             if (bandData.mainlyPositiveDeltas()) {
  220.                 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs1);
  221.                 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs3);
  222.                 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs4);
  223.                 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs5);
  224.                 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs1);
  225.                 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs3);
  226.                 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs4);
  227.                 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs5);
  228.                 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs2);
  229.             } else {
  230.                 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs1);
  231.                 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs3);
  232.                 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs2);
  233.                 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs4);
  234.                 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs5);
  235.                 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs1);
  236.                 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs2);
  237.             }
  238.         } else if (bandData.anyNegatives()) {
  239.             codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs1);
  240.             codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs2);
  241.             codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs1);
  242.             codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs2);
  243.             codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs3);
  244.             codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs4);
  245.             codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs5);
  246.         } else {
  247.             codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs1);
  248.             codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs3);
  249.             codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs4);
  250.             codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs5);
  251.             codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs2);
  252.             codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs1);
  253.             codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs3);
  254.             codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs4);
  255.             codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs5);
  256.         }
  257.         for (final BHSDCodec[] family : codecFamiliesToTry) {
  258.             tryCodecs(band, defaultCodec, bandData, results, encoded, family);
  259.             if (timeToStop(results)) {
  260.                 break;
  261.             }
  262.         }

  263.         return results;
  264.     }

  265.     /**
  266.      * Converts a list of ConstantPoolEntrys to an int[] array of their indices
  267.      *
  268.      * @param list conversion source.
  269.      * @return conversion result.
  270.      */
  271.     protected int[] cpEntryListToArray(final List<? extends ConstantPoolEntry> list) {
  272.         final int[] array = new int[list.size()];
  273.         for (int i = 0; i < array.length; i++) {
  274.             array[i] = list.get(i).getIndex();
  275.             if (array[i] < 0) {
  276.                 throw new IllegalArgumentException("Index should be > 0");
  277.             }
  278.         }
  279.         return array;
  280.     }

  281.     /**
  282.      * Converts a list of ConstantPoolEntrys or nulls to an int[] array of their indices +1 (or 0 for nulls)
  283.      *
  284.      * @param list conversion source.
  285.      * @return conversion result.
  286.      */
  287.     protected int[] cpEntryOrNullListToArray(final List<? extends ConstantPoolEntry> list) {
  288.         final int[] array = new int[list.size()];
  289.         for (int j = 0; j < array.length; j++) {
  290.             final ConstantPoolEntry cpEntry = list.get(j);
  291.             array[j] = cpEntry == null ? 0 : cpEntry.getIndex() + 1;
  292.             if (cpEntry != null && cpEntry.getIndex() < 0) {
  293.                 throw new IllegalArgumentException("Index should be > 0");
  294.             }
  295.         }
  296.         return array;
  297.     }

  298.     /**
  299.      * Encode a band of integers. The default codec may be used, but other Codecs are considered if effort is greater than 1.
  300.      *
  301.      * @param name         name of the band (used for debugging)
  302.      * @param ints         the band
  303.      * @param defaultCodec the default Codec
  304.      * @return the encoded band
  305.      * @throws Pack200Exception TODO
  306.      */
  307.     public byte[] encodeBandInt(final String name, final int[] ints, final BHSDCodec defaultCodec) throws Pack200Exception {
  308.         byte[] encodedBand = null;
  309.         // Useful for debugging
  310. //        if (ints.length > 0) {
  311. //            System.out.println("encoding " + name + " " + ints.length);
  312. //        }
  313.         if (effort > 1 && ints.length >= effortThresholds[effort]) {
  314.             final BandAnalysisResults results = analyseBand(name, ints, defaultCodec);
  315.             final Codec betterCodec = results.betterCodec;
  316.             encodedBand = results.encodedBand;
  317.             if (betterCodec != null) {
  318.                 if (betterCodec instanceof BHSDCodec) {
  319.                     final int[] specifierBand = CodecEncoding.getSpecifier(betterCodec, defaultCodec);
  320.                     int specifier = specifierBand[0];
  321.                     if (specifierBand.length > 1) {
  322.                         for (int i = 1; i < specifierBand.length; i++) {
  323.                             segmentHeader.appendBandCodingSpecifier(specifierBand[i]);
  324.                         }
  325.                     }
  326.                     if (defaultCodec.isSigned()) {
  327.                         specifier = -1 - specifier;
  328.                     } else {
  329.                         specifier += defaultCodec.getL();
  330.                     }
  331.                     final byte[] specifierEncoded = defaultCodec.encode(new int[] { specifier });
  332.                     final byte[] band = new byte[specifierEncoded.length + encodedBand.length];
  333.                     System.arraycopy(specifierEncoded, 0, band, 0, specifierEncoded.length);
  334.                     System.arraycopy(encodedBand, 0, band, specifierEncoded.length, encodedBand.length);
  335.                     return band;
  336.                 }
  337.                 if (betterCodec instanceof PopulationCodec) {
  338.                     IntStream.of(results.extraMetadata).forEach(segmentHeader::appendBandCodingSpecifier);
  339.                     return encodedBand;
  340.                 }
  341.                 if (betterCodec instanceof RunCodec) {
  342.                     // TODO?
  343.                 }
  344.             }
  345.         }

  346.         // If we get here then we've decided to use the default codec.
  347.         if (ints.length > 0) {
  348.             if (encodedBand == null) {
  349.                 encodedBand = defaultCodec.encode(ints);
  350.             }
  351.             final int first = ints[0];
  352.             if (defaultCodec.getB() != 1) {
  353.                 if (defaultCodec.isSigned() && first >= -256 && first <= -1) {
  354.                     final int specifier = -1 - CodecEncoding.getSpecifierForDefaultCodec(defaultCodec);
  355.                     final byte[] specifierEncoded = defaultCodec.encode(new int[] { specifier });
  356.                     final byte[] band = new byte[specifierEncoded.length + encodedBand.length];
  357.                     System.arraycopy(specifierEncoded, 0, band, 0, specifierEncoded.length);
  358.                     System.arraycopy(encodedBand, 0, band, specifierEncoded.length, encodedBand.length);
  359.                     return band;
  360.                 }
  361.                 if (!defaultCodec.isSigned() && first >= defaultCodec.getL() && first <= defaultCodec.getL() + 255) {
  362.                     final int specifier = CodecEncoding.getSpecifierForDefaultCodec(defaultCodec) + defaultCodec.getL();
  363.                     final byte[] specifierEncoded = defaultCodec.encode(new int[] { specifier });
  364.                     final byte[] band = new byte[specifierEncoded.length + encodedBand.length];
  365.                     System.arraycopy(specifierEncoded, 0, band, 0, specifierEncoded.length);
  366.                     System.arraycopy(encodedBand, 0, band, specifierEncoded.length, encodedBand.length);
  367.                     return band;
  368.                 }
  369.             }
  370.             return encodedBand;
  371.         }
  372.         return EMPTY_BYTE_ARRAY;
  373.     }

  374.     /**
  375.      * Encode a band of longs (values are split into their high and low 32 bits and then encoded as two separate bands
  376.      *
  377.      * @param name        name of the band (for debugging purposes)
  378.      * @param flags       the band
  379.      * @param loCodec     Codec for the low 32-bits band
  380.      * @param hiCodec     Codec for the high 32-bits band
  381.      * @param haveHiFlags ignores the high band if true as all values would be zero
  382.      * @return the encoded band
  383.      * @throws Pack200Exception TODO
  384.      */
  385.     protected byte[] encodeFlags(final String name, final long[] flags, final BHSDCodec loCodec, final BHSDCodec hiCodec, final boolean haveHiFlags)
  386.             throws Pack200Exception {
  387.         if (!haveHiFlags) {
  388.             final int[] loBits = new int[flags.length];
  389.             Arrays.setAll(loBits, i -> (int) flags[i]);
  390.             return encodeBandInt(name, loBits, loCodec);
  391.         }
  392.         final int[] hiBits = new int[flags.length];
  393.         final int[] loBits = new int[flags.length];
  394.         for (int i = 0; i < flags.length; i++) {
  395.             final long l = flags[i];
  396.             hiBits[i] = (int) (l >> 32);
  397.             loBits[i] = (int) l;
  398.         }
  399.         final byte[] hi = encodeBandInt(name, hiBits, hiCodec);
  400.         final byte[] lo = encodeBandInt(name, loBits, loCodec);
  401.         final byte[] total = new byte[hi.length + lo.length];
  402.         System.arraycopy(hi, 0, total, 0, hi.length);
  403.         System.arraycopy(lo, 0, total, hi.length + 1, lo.length);
  404.         return total;
  405.     }

  406. // This could be useful if further enhancements are done but is not currently used
  407. //
  408. //    private void encodeWithRunCodec(String name, int[] band, int index,
  409. //            BHSDCodec defaultCodec, BandData bandData,
  410. //            BandAnalysisResults results) throws Pack200Exception {
  411. //        int[] firstBand = new int[index];
  412. //        int[] secondBand = new int[band.length - index];
  413. //        System.arraycopy(band, 0, firstBand, 0, index);
  414. //        System.arraycopy(band, index, secondBand, 0, secondBand.length);
  415. //        BandAnalysisResults firstResults = analyseBand(name + "A", firstBand, defaultCodec);
  416. //        BandAnalysisResults secondResults = analyseBand(name + "B", secondBand, defaultCodec);
  417. //        int specifier = 117;
  418. //        byte[] specifierEncoded = defaultCodec.encode(new int[] {specifier});
  419. //        int totalLength = firstResults.encodedBand.length + secondResults.encodedBand.length + specifierEncoded.length + 4; // TODO actual
  420. //        if (totalLength < results.encodedBand.length) {
  421. //            System.out.println("using run codec");
  422. //            results.saved += results.encodedBand.length - totalLength;
  423. //            byte[] encodedBand = new byte[specifierEncoded.length + firstResults.encodedBand.length + secondResults.encodedBand.length];
  424. //            System.arraycopy(specifierEncoded, 0, encodedBand, 0, specifierEncoded.length);
  425. //            System.arraycopy(firstResults.encodedBand, 0, encodedBand, specifierEncoded.length, firstResults.encodedBand.length);
  426. //            System.arraycopy(secondResults.encodedBand, 0, encodedBand, specifierEncoded.length + firstResults.encodedBand.length,
  427. //              secondResults.encodedBand.length);
  428. //            results.encodedBand = encodedBand;
  429. //            results.betterCodec = new RunCodec(index, firstResults.betterCodec, secondResults.betterCodec);
  430. //        }
  431. //    }

  432.     protected byte[] encodeFlags(final String name, final long[][] flags, final BHSDCodec loCodec, final BHSDCodec hiCodec, final boolean haveHiFlags)
  433.             throws Pack200Exception {
  434.         return encodeFlags(name, flatten(flags), loCodec, hiCodec, haveHiFlags);
  435.     }

  436.     /**
  437.      * Encode a single value with the given Codec
  438.      *
  439.      * @param value the value to encode
  440.      * @param codec Codec to use
  441.      * @return the encoded value
  442.      * @throws Pack200Exception TODO
  443.      */
  444.     public byte[] encodeScalar(final int value, final BHSDCodec codec) throws Pack200Exception {
  445.         return codec.encode(value);
  446.     }

  447.     /**
  448.      * Encode a band without considering other Codecs
  449.      *
  450.      * @param band  the band
  451.      * @param codec the Codec to use
  452.      * @return the encoded band
  453.      * @throws Pack200Exception TODO
  454.      */
  455.     public byte[] encodeScalar(final int[] band, final BHSDCodec codec) throws Pack200Exception {
  456.         return codec.encode(band);
  457.     }

  458.     private void encodeWithPopulationCodec(final int[] band, final BHSDCodec defaultCodec, final BandData bandData, final BandAnalysisResults results)
  459.             throws Pack200Exception {
  460.         results.numCodecsTried += 3; // quite a bit more effort to try this codec
  461.         final Map<Integer, Integer> distinctValues = bandData.distinctValues;

  462.         final List<Integer> favored = new ArrayList<>();
  463.         distinctValues.forEach((k, v) -> {
  464.             if (v.intValue() > 2 || distinctValues.size() < 256) { // TODO: tweak
  465.                 favored.add(k);
  466.             }
  467.         });

  468.         // Sort the favored list with the most commonly occurring first
  469.         if (distinctValues.size() > 255) {
  470.             favored.sort((arg0, arg1) -> distinctValues.get(arg1).compareTo(distinctValues.get(arg0)));
  471.         }

  472.         final Map<Integer, Integer> favoredToIndex = new HashMap<>();
  473.         for (int i = 0; i < favored.size(); i++) {
  474.             favoredToIndex.put(favored.get(i), Integer.valueOf(i));
  475.         }

  476.         final IntList unfavoured = new IntList();
  477.         final int[] tokens = new int[band.length];
  478.         for (int i = 0; i < band.length; i++) {
  479.             final Integer favouredIndex = favoredToIndex.get(Integer.valueOf(band[i]));
  480.             if (favouredIndex == null) {
  481.                 tokens[i] = 0;
  482.                 unfavoured.add(band[i]);
  483.             } else {
  484.                 tokens[i] = favouredIndex.intValue() + 1;
  485.             }
  486.         }
  487.         favored.add(favored.get(favored.size() - 1)); // repeat last value
  488.         final int[] favouredBand = integerListToArray(favored);
  489.         final int[] unfavouredBand = unfavoured.toArray();

  490.         // Analyse the three bands to get the best codec
  491.         final BandAnalysisResults favouredResults = analyseBand("POPULATION", favouredBand, defaultCodec);
  492.         final BandAnalysisResults unfavouredResults = analyseBand("POPULATION", unfavouredBand, defaultCodec);

  493.         int tdefL = 0;
  494.         int l = 0;
  495.         Codec tokenCodec = null;
  496.         byte[] tokensEncoded;
  497.         final int k = favored.size() - 1;
  498.         if (k < 256) {
  499.             tdefL = 1;
  500.             tokensEncoded = Codec.BYTE1.encode(tokens);
  501.         } else {
  502.             final BandAnalysisResults tokenResults = analyseBand("POPULATION", tokens, defaultCodec);
  503.             tokenCodec = tokenResults.betterCodec;
  504.             tokensEncoded = tokenResults.encodedBand;
  505.             if (tokenCodec == null) {
  506.                 tokenCodec = defaultCodec;
  507.             }
  508.             l = ((BHSDCodec) tokenCodec).getL();
  509.             final int h = ((BHSDCodec) tokenCodec).getH();
  510.             final int s = ((BHSDCodec) tokenCodec).getS();
  511.             final int b = ((BHSDCodec) tokenCodec).getB();
  512.             final int d = ((BHSDCodec) tokenCodec).isDelta() ? 1 : 0;
  513.             if (s == 0 && d == 0) {
  514.                 boolean canUseTDefL = true;
  515.                 if (b > 1) {
  516.                     final BHSDCodec oneLowerB = new BHSDCodec(b - 1, h);
  517.                     if (oneLowerB.largest() >= k) {
  518.                         canUseTDefL = false;
  519.                     }
  520.                 }
  521.                 if (canUseTDefL) {
  522.                     switch (l) {
  523.                     case 4:
  524.                         tdefL = 1;
  525.                         break;
  526.                     case 8:
  527.                         tdefL = 2;
  528.                         break;
  529.                     case 16:
  530.                         tdefL = 3;
  531.                         break;
  532.                     case 32:
  533.                         tdefL = 4;
  534.                         break;
  535.                     case 64:
  536.                         tdefL = 5;
  537.                         break;
  538.                     case 128:
  539.                         tdefL = 6;
  540.                         break;
  541.                     case 192:
  542.                         tdefL = 7;
  543.                         break;
  544.                     case 224:
  545.                         tdefL = 8;
  546.                         break;
  547.                     case 240:
  548.                         tdefL = 9;
  549.                         break;
  550.                     case 248:
  551.                         tdefL = 10;
  552.                         break;
  553.                     case 252:
  554.                         tdefL = 11;
  555.                         break;
  556.                     }
  557.                 }
  558.             }
  559.         }

  560.         final byte[] favouredEncoded = favouredResults.encodedBand;
  561.         final byte[] unfavouredEncoded = unfavouredResults.encodedBand;
  562.         final Codec favouredCodec = favouredResults.betterCodec;
  563.         final Codec unfavouredCodec = unfavouredResults.betterCodec;

  564.         int specifier = 141 + (favouredCodec == null ? 1 : 0) + 4 * tdefL + (unfavouredCodec == null ? 2 : 0);
  565.         final IntList extraBandMetadata = new IntList(3);
  566.         if (favouredCodec != null) {
  567.             IntStream.of(CodecEncoding.getSpecifier(favouredCodec, null)).forEach(extraBandMetadata::add);
  568.         }
  569.         if (tdefL == 0) {
  570.             IntStream.of(CodecEncoding.getSpecifier(tokenCodec, null)).forEach(extraBandMetadata::add);
  571.         }
  572.         if (unfavouredCodec != null) {
  573.             IntStream.of(CodecEncoding.getSpecifier(unfavouredCodec, null)).forEach(extraBandMetadata::add);
  574.         }
  575.         final int[] extraMetadata = extraBandMetadata.toArray();
  576.         final byte[] extraMetadataEncoded = Codec.UNSIGNED5.encode(extraMetadata);
  577.         if (defaultCodec.isSigned()) {
  578.             specifier = -1 - specifier;
  579.         } else {
  580.             specifier += defaultCodec.getL();
  581.         }
  582.         final byte[] firstValueEncoded = defaultCodec.encode(new int[] { specifier });
  583.         final int totalBandLength = firstValueEncoded.length + favouredEncoded.length + tokensEncoded.length + unfavouredEncoded.length;

  584.         if (totalBandLength + extraMetadataEncoded.length < results.encodedBand.length) {
  585.             results.saved += results.encodedBand.length - (totalBandLength + extraMetadataEncoded.length);
  586.             final byte[] encodedBand = new byte[totalBandLength];
  587.             System.arraycopy(firstValueEncoded, 0, encodedBand, 0, firstValueEncoded.length);
  588.             System.arraycopy(favouredEncoded, 0, encodedBand, firstValueEncoded.length, favouredEncoded.length);
  589.             System.arraycopy(tokensEncoded, 0, encodedBand, firstValueEncoded.length + favouredEncoded.length, tokensEncoded.length);
  590.             System.arraycopy(unfavouredEncoded, 0, encodedBand, firstValueEncoded.length + favouredEncoded.length + tokensEncoded.length,
  591.                     unfavouredEncoded.length);
  592.             results.encodedBand = encodedBand;
  593.             results.extraMetadata = extraMetadata;
  594.             if (l != 0) {
  595.                 results.betterCodec = new PopulationCodec(favouredCodec, l, unfavouredCodec);
  596.             } else {
  597.                 results.betterCodec = new PopulationCodec(favouredCodec, tokenCodec, unfavouredCodec);
  598.             }
  599.         }
  600.     }

  601.     /*
  602.      * Flatten a 2-dimension array into a 1-dimension array
  603.      */
  604.     private long[] flatten(final long[][] flags) {
  605.         int totalSize = 0;
  606.         for (final long[] flag : flags) {
  607.             totalSize += flag.length;
  608.         }
  609.         final long[] flatArray = new long[totalSize];
  610.         int index = 0;
  611.         for (final long[] flag : flags) {
  612.             for (final long element : flag) {
  613.                 flatArray[index] = element;
  614.                 index++;
  615.             }
  616.         }
  617.         return flatArray;
  618.     }

  619.     /**
  620.      * Converts a list of Integers to an int[] array.
  621.      *
  622.      * @param integerList conversion source.
  623.      * @return conversion result.
  624.      */
  625.     protected int[] integerListToArray(final List<Integer> integerList) {
  626.         return integerList.stream().mapToInt(Integer::intValue).toArray();
  627.     }

  628.     /**
  629.      * Converts a list of Longs to an long[] array.
  630.      *
  631.      * @param longList conversion source.
  632.      * @return conversion result.
  633.      */
  634.     protected long[] longListToArray(final List<Long> longList) {
  635.         return longList.stream().mapToLong(Long::longValue).toArray();
  636.     }

  637.     /**
  638.      * Write the packed set of bands to the given output stream
  639.      *
  640.      * @param out TODO
  641.      * @throws IOException      If an I/O error occurs.
  642.      * @throws Pack200Exception TODO
  643.      */
  644.     public abstract void pack(OutputStream out) throws IOException, Pack200Exception;

  645.     private boolean timeToStop(final BandAnalysisResults results) {
  646.         // if tried more than effort number of codecs for this band then return
  647.         // Note: these values have been tuned - please test carefully if changing them
  648.         if (effort > 6) {
  649.             return results.numCodecsTried >= effort * 2;
  650.         }
  651.         return results.numCodecsTried >= effort;
  652.         // May want to also check how much we've saved if performance needs improving, e.g. saved more than effort*2 %
  653.         // || (float) results.saved/(float) results.encodedBand.length > (float) effort * 2/100;
  654.     }

  655.     private void tryCodecs(final int[] band, final BHSDCodec defaultCodec, final BandData bandData, final BandAnalysisResults results, final byte[] encoded,
  656.             final BHSDCodec[] potentialCodecs) throws Pack200Exception {
  657.         for (final BHSDCodec potential : potentialCodecs) {
  658.             if (potential.equals(defaultCodec)) {
  659.                 return; // don't try codecs with greater cardinality in the same 'family' as the default codec as there
  660.                         // won't be any savings
  661.             }
  662.             if (potential.isDelta()) {
  663.                 if (potential.largest() >= bandData.largestDelta && potential.smallest() <= bandData.smallestDelta && potential.largest() >= bandData.largest
  664.                         && potential.smallest() <= bandData.smallest) {
  665.                     // TODO: can represent some negative deltas with overflow
  666.                     final byte[] encoded2 = potential.encode(band);
  667.                     results.numCodecsTried++;
  668.                     final byte[] specifierEncoded = defaultCodec.encode(CodecEncoding.getSpecifier(potential, null));
  669.                     final int saved = encoded.length - encoded2.length - specifierEncoded.length;
  670.                     if (saved > results.saved) {
  671.                         results.betterCodec = potential;
  672.                         results.encodedBand = encoded2;
  673.                         results.saved = saved;
  674.                     }
  675.                 }
  676.             } else if (potential.largest() >= bandData.largest && potential.smallest() <= bandData.smallest) {
  677.                 final byte[] encoded2 = potential.encode(band);
  678.                 results.numCodecsTried++;
  679.                 final byte[] specifierEncoded = defaultCodec.encode(CodecEncoding.getSpecifier(potential, null));
  680.                 final int saved = encoded.length - encoded2.length - specifierEncoded.length;
  681.                 if (saved > results.saved) {
  682.                     results.betterCodec = potential;
  683.                     results.encodedBand = encoded2;
  684.                     results.saved = saved;
  685.                 }
  686.             }
  687.             if (timeToStop(results)) {
  688.                 return;
  689.             }
  690.         }
  691.     }

  692. }