001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * https://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.harmony.pack200; 020 021import java.io.IOException; 022import java.io.OutputStream; 023import java.util.ArrayList; 024import java.util.Arrays; 025import java.util.HashMap; 026import java.util.List; 027import java.util.Map; 028import java.util.stream.IntStream; 029 030/** 031 * Abstract superclass for a set of bands 032 */ 033public abstract class BandSet { 034 035 /** 036 * Results obtained by trying different Codecs to encode a band 037 */ 038 public class BandAnalysisResults { 039 040 // The number of Codecs tried so far 041 private int numCodecsTried; 042 043 // The number of bytes saved by using betterCodec instead of the default codec 044 private int saved; 045 046 // Extra metadata to pass to the segment header (to be appended to the 047 // band_headers band) 048 private int[] extraMetadata; 049 050 // The results of encoding the band with betterCodec 051 private byte[] encodedBand; 052 053 // The best Codec found so far, or should be null if the default is the 054 // best so far 055 private Codec betterCodec; 056 057 /** 058 * Constructs a new instance. 059 */ 060 public BandAnalysisResults() { 061 // empty 062 } 063 064 } 065 066 /** 067 * BandData represents information about a band, for example largest value etc and is used in the heuristics that calculate whether an alternative Codec 068 * could make the encoded band smaller. 069 */ 070 public class BandData { 071 072 private final int[] band; 073 private int smallest = Integer.MAX_VALUE; 074 private int largest = Integer.MIN_VALUE; 075 private int smallestDelta; 076 private int largestDelta; 077 078 private int deltaIsAscending; 079 private int smallDeltaCount; 080 081 private double averageAbsoluteDelta; 082 private double averageAbsoluteValue; 083 084 private Map<Integer, Integer> distinctValues; 085 086 /** 087 * Constructs a new instance of BandData. The band is then analyzed. 088 * 089 * @param band the band of integers 090 */ 091 public BandData(final int[] band) { 092 this.band = band; 093 final Integer one = Integer.valueOf(1); 094 for (int i = 0; i < band.length; i++) { 095 if (band[i] < smallest) { 096 smallest = band[i]; 097 } 098 if (band[i] > largest) { 099 largest = band[i]; 100 } 101 if (i != 0) { 102 final int delta = band[i] - band[i - 1]; 103 if (delta < smallestDelta) { 104 smallestDelta = delta; 105 } 106 if (delta > largestDelta) { 107 largestDelta = delta; 108 } 109 if (delta >= 0) { 110 deltaIsAscending++; 111 } 112 averageAbsoluteDelta += (double) Math.abs(delta) / (double) (band.length - 1); 113 if (Math.abs(delta) < 256) { 114 smallDeltaCount++; 115 } 116 } else { 117 smallestDelta = band[0]; 118 largestDelta = band[0]; 119 } 120 averageAbsoluteValue += (double) Math.abs(band[i]) / (double) band.length; 121 if (effort > 3) { // do calculations needed to consider population codec 122 if (distinctValues == null) { 123 distinctValues = new HashMap<>(); 124 } 125 final Integer value = Integer.valueOf(band[i]); 126 Integer count = distinctValues.get(value); 127 if (count == null) { 128 count = one; 129 } else { 130 count = Integer.valueOf(count.intValue() + 1); 131 } 132 distinctValues.put(value, count); 133 } 134 } 135 } 136 137 /** 138 * Returns true if any band elements are negative. 139 * 140 * @return true if any band elements are negative. 141 */ 142 public boolean anyNegatives() { 143 return smallest < 0; 144 } 145 146 /** 147 * Returns true if the band deltas are mainly positive (heuristic). 148 * 149 * @return true if the band deltas are mainly positive (heuristic). 150 */ 151 public boolean mainlyPositiveDeltas() { 152 // Note: the value below has been tuned - please test carefully if changing it 153 return (float) deltaIsAscending / (float) band.length > 0.95F; 154 } 155 156 /** 157 * Returns true if the deltas between adjacent band elements are mainly small (heuristic). 158 * 159 * @return true if the deltas between adjacent band elements are mainly small (heuristic). 160 */ 161 public boolean mainlySmallDeltas() { 162 // Note: the value below has been tuned - please test carefully if changing it 163 return (float) smallDeltaCount / (float) band.length > 0.7F; 164 } 165 166 /** 167 * Returns the total number of distinct values found in the band. 168 * 169 * @return the total number of distinct values found in the band. 170 */ 171 public int numDistinctValues() { 172 if (distinctValues == null) { 173 return band.length; 174 } 175 return distinctValues.size(); 176 } 177 178 /** 179 * Returns true if the band is well correlated (i.e. would be suitable for a delta encoding) (heuristic). 180 * 181 * @return true if the band is well correlated (i.e. would be suitable for a delta encoding) (heuristic). 182 */ 183 public boolean wellCorrelated() { 184 // Note: the value below has been tuned - please test carefully if changing it 185 return averageAbsoluteDelta * 3.1 < averageAbsoluteValue; 186 } 187 188 } 189 190 private static final byte[] EMPTY_BYTE_ARRAY = {}; 191 192 // Minimum size of band for each effort level where we consider alternative codecs 193 // Note: these values have been tuned - please test carefully if changing them 194 private static final int[] effortThresholds = { 0, 0, 1000, 500, 100, 100, 100, 100, 100, 0 }; 195 196 protected final SegmentHeader segmentHeader; 197 final int effort; 198 199 private long[] canonicalLargest; 200 201 private long[] canonicalSmallest; 202 203 /** 204 * Constructs a new BandSet. 205 * 206 * @param effort the packing effort to be used (must be 1-9) 207 * @param header the segment header 208 */ 209 public BandSet(final int effort, final SegmentHeader header) { 210 this.effort = effort; 211 this.segmentHeader = header; 212 } 213 214 private BandAnalysisResults analyseBand(final String name, final int[] band, final BHSDCodec defaultCodec) throws Pack200Exception { 215 216 final BandAnalysisResults results = new BandAnalysisResults(); 217 218 if (canonicalLargest == null) { 219 canonicalLargest = new long[116]; 220 canonicalSmallest = new long[116]; 221 for (int i = 1; i < canonicalLargest.length; i++) { 222 canonicalLargest[i] = CodecEncoding.getCanonicalCodec(i).largest(); 223 canonicalSmallest[i] = CodecEncoding.getCanonicalCodec(i).smallest(); 224 } 225 } 226 final BandData bandData = new BandData(band); 227 228 // Check that there is a reasonable saving to be made 229 final byte[] encoded = defaultCodec.encode(band); 230 results.encodedBand = encoded; 231 232 // Note: these values have been tuned - please test carefully if changing them 233 if (encoded.length <= band.length + 23 - 2 * effort) { // TODO: tweak 234 return results; 235 } 236 237 // Check if we can use BYTE1 as that's a 1:1 mapping if we can 238 if (!bandData.anyNegatives() && bandData.largest <= Codec.BYTE1.largest()) { 239 results.encodedBand = Codec.BYTE1.encode(band); 240 results.betterCodec = Codec.BYTE1; 241 return results; 242 } 243 244 // Consider a population codec (but can't be nested) 245 if (effort > 3 && !name.equals("POPULATION")) { 246 final int numDistinctValues = bandData.numDistinctValues(); 247 final float distinctValuesAsProportion = (float) numDistinctValues / (float) band.length; 248 249 // Note: these values have been tuned - please test carefully if changing them 250 if (numDistinctValues < 100 || distinctValuesAsProportion < 0.02 || effort > 6 && distinctValuesAsProportion < 0.04) { // TODO: tweak 251 encodeWithPopulationCodec(band, defaultCodec, bandData, results); 252 if (timeToStop(results)) { 253 return results; 254 } 255 } 256 } 257 258 final List<BHSDCodec[]> codecFamiliesToTry = new ArrayList<>(); 259 260 // See if the deltas are mainly small increments 261 if (bandData.mainlyPositiveDeltas() && bandData.mainlySmallDeltas()) { 262 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs2); 263 } 264 265 if (bandData.wellCorrelated()) { // Try delta encodings 266 if (bandData.mainlyPositiveDeltas()) { 267 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs1); 268 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs3); 269 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs4); 270 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs5); 271 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs1); 272 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs3); 273 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs4); 274 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs5); 275 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs2); 276 } else { 277 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs1); 278 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs3); 279 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs2); 280 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs4); 281 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs5); 282 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs1); 283 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs2); 284 } 285 } else if (bandData.anyNegatives()) { 286 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs1); 287 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs2); 288 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs1); 289 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs2); 290 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs3); 291 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs4); 292 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs5); 293 } else { 294 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs1); 295 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs3); 296 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs4); 297 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs5); 298 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs2); 299 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs1); 300 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs3); 301 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs4); 302 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs5); 303 } 304 for (final BHSDCodec[] family : codecFamiliesToTry) { 305 tryCodecs(band, defaultCodec, bandData, results, encoded, family); 306 if (timeToStop(results)) { 307 break; 308 } 309 } 310 311 return results; 312 } 313 314 /** 315 * Converts a list of ConstantPoolEntrys to an int[] array of their indices 316 * 317 * @param list conversion source. 318 * @return conversion result. 319 */ 320 protected int[] cpEntryListToArray(final List<? extends ConstantPoolEntry> list) { 321 final int[] array = new int[list.size()]; 322 for (int i = 0; i < array.length; i++) { 323 array[i] = list.get(i).getIndex(); 324 if (array[i] < 0) { 325 throw new IllegalArgumentException("Index should be > 0"); 326 } 327 } 328 return array; 329 } 330 331 /** 332 * Converts a list of ConstantPoolEntrys or nulls to an int[] array of their indices +1 (or 0 for nulls) 333 * 334 * @param list conversion source. 335 * @return conversion result. 336 */ 337 protected int[] cpEntryOrNullListToArray(final List<? extends ConstantPoolEntry> list) { 338 final int[] array = new int[list.size()]; 339 for (int j = 0; j < array.length; j++) { 340 final ConstantPoolEntry cpEntry = list.get(j); 341 array[j] = cpEntry == null ? 0 : cpEntry.getIndex() + 1; 342 if (cpEntry != null && cpEntry.getIndex() < 0) { 343 throw new IllegalArgumentException("Index should be > 0"); 344 } 345 } 346 return array; 347 } 348 349 /** 350 * Encode a band of integers. The default codec may be used, but other Codecs are considered if effort is greater than 1. 351 * 352 * @param name name of the band (used for debugging) 353 * @param ints the band 354 * @param defaultCodec the default Codec 355 * @return the encoded band 356 * @throws Pack200Exception If a Pack200 semantic error occurs. 357 */ 358 public byte[] encodeBandInt(final String name, final int[] ints, final BHSDCodec defaultCodec) throws Pack200Exception { 359 byte[] encodedBand = null; 360 // Useful for debugging 361// if (ints.length > 0) { 362// System.out.println("encoding " + name + " " + ints.length); 363// } 364 if (effort > 1 && ints.length >= effortThresholds[effort]) { 365 final BandAnalysisResults results = analyseBand(name, ints, defaultCodec); 366 final Codec betterCodec = results.betterCodec; 367 encodedBand = results.encodedBand; 368 if (betterCodec != null) { 369 if (betterCodec instanceof BHSDCodec) { 370 final int[] specifierBand = CodecEncoding.getSpecifier(betterCodec, defaultCodec); 371 int specifier = specifierBand[0]; 372 if (specifierBand.length > 1) { 373 for (int i = 1; i < specifierBand.length; i++) { 374 segmentHeader.appendBandCodingSpecifier(specifierBand[i]); 375 } 376 } 377 if (defaultCodec.isSigned()) { 378 specifier = -1 - specifier; 379 } else { 380 specifier += defaultCodec.getL(); 381 } 382 final byte[] specifierEncoded = defaultCodec.encode(new int[] { specifier }); 383 final byte[] band = new byte[specifierEncoded.length + encodedBand.length]; 384 System.arraycopy(specifierEncoded, 0, band, 0, specifierEncoded.length); 385 System.arraycopy(encodedBand, 0, band, specifierEncoded.length, encodedBand.length); 386 return band; 387 } 388 if (betterCodec instanceof PopulationCodec) { 389 IntStream.of(results.extraMetadata).forEach(segmentHeader::appendBandCodingSpecifier); 390 return encodedBand; 391 } 392 if (betterCodec instanceof RunCodec) { 393 // TODO? 394 } 395 } 396 } 397 398 // If we get here then we've decided to use the default codec. 399 if (ints.length > 0) { 400 if (encodedBand == null) { 401 encodedBand = defaultCodec.encode(ints); 402 } 403 final int first = ints[0]; 404 if (defaultCodec.getB() != 1) { 405 if (defaultCodec.isSigned() && first >= -256 && first <= -1) { 406 final int specifier = -1 - CodecEncoding.getSpecifierForDefaultCodec(defaultCodec); 407 final byte[] specifierEncoded = defaultCodec.encode(new int[] { specifier }); 408 final byte[] band = new byte[specifierEncoded.length + encodedBand.length]; 409 System.arraycopy(specifierEncoded, 0, band, 0, specifierEncoded.length); 410 System.arraycopy(encodedBand, 0, band, specifierEncoded.length, encodedBand.length); 411 return band; 412 } 413 if (!defaultCodec.isSigned() && first >= defaultCodec.getL() && first <= defaultCodec.getL() + 255) { 414 final int specifier = CodecEncoding.getSpecifierForDefaultCodec(defaultCodec) + defaultCodec.getL(); 415 final byte[] specifierEncoded = defaultCodec.encode(new int[] { specifier }); 416 final byte[] band = new byte[specifierEncoded.length + encodedBand.length]; 417 System.arraycopy(specifierEncoded, 0, band, 0, specifierEncoded.length); 418 System.arraycopy(encodedBand, 0, band, specifierEncoded.length, encodedBand.length); 419 return band; 420 } 421 } 422 return encodedBand; 423 } 424 return EMPTY_BYTE_ARRAY; 425 } 426 427 /** 428 * Encode a band of longs (values are split into their high and low 32 bits and then encoded as two separate bands 429 * 430 * @param name name of the band (for debugging purposes) 431 * @param flags the band 432 * @param loCodec Codec for the low 32-bits band 433 * @param hiCodec Codec for the high 32-bits band 434 * @param haveHiFlags ignores the high band if true as all values would be zero 435 * @return the encoded band 436 * @throws Pack200Exception If a Pack200 semantic error occurs. 437 */ 438 protected byte[] encodeFlags(final String name, final long[] flags, final BHSDCodec loCodec, final BHSDCodec hiCodec, final boolean haveHiFlags) 439 throws Pack200Exception { 440 if (!haveHiFlags) { 441 final int[] loBits = new int[flags.length]; 442 Arrays.setAll(loBits, i -> (int) flags[i]); 443 return encodeBandInt(name, loBits, loCodec); 444 } 445 final int[] hiBits = new int[flags.length]; 446 final int[] loBits = new int[flags.length]; 447 for (int i = 0; i < flags.length; i++) { 448 final long l = flags[i]; 449 hiBits[i] = (int) (l >> 32); 450 loBits[i] = (int) l; 451 } 452 final byte[] hi = encodeBandInt(name, hiBits, hiCodec); 453 final byte[] lo = encodeBandInt(name, loBits, loCodec); 454 final byte[] total = new byte[hi.length + lo.length]; 455 System.arraycopy(hi, 0, total, 0, hi.length); 456 System.arraycopy(lo, 0, total, hi.length + 1, lo.length); 457 return total; 458 } 459 460// This could be useful if further enhancements are done but is not currently used 461// 462// private void encodeWithRunCodec(String name, int[] band, int index, 463// BHSDCodec defaultCodec, BandData bandData, 464// BandAnalysisResults results) throws Pack200Exception { 465// int[] firstBand = new int[index]; 466// int[] secondBand = new int[band.length - index]; 467// System.arraycopy(band, 0, firstBand, 0, index); 468// System.arraycopy(band, index, secondBand, 0, secondBand.length); 469// BandAnalysisResults firstResults = analyseBand(name + "A", firstBand, defaultCodec); 470// BandAnalysisResults secondResults = analyseBand(name + "B", secondBand, defaultCodec); 471// int specifier = 117; 472// byte[] specifierEncoded = defaultCodec.encode(new int[] {specifier}); 473// int totalLength = firstResults.encodedBand.length + secondResults.encodedBand.length + specifierEncoded.length + 4; // TODO actual 474// if (totalLength < results.encodedBand.length) { 475// System.out.println("using run codec"); 476// results.saved += results.encodedBand.length - totalLength; 477// byte[] encodedBand = new byte[specifierEncoded.length + firstResults.encodedBand.length + secondResults.encodedBand.length]; 478// System.arraycopy(specifierEncoded, 0, encodedBand, 0, specifierEncoded.length); 479// System.arraycopy(firstResults.encodedBand, 0, encodedBand, specifierEncoded.length, firstResults.encodedBand.length); 480// System.arraycopy(secondResults.encodedBand, 0, encodedBand, specifierEncoded.length + firstResults.encodedBand.length, 481// secondResults.encodedBand.length); 482// results.encodedBand = encodedBand; 483// results.betterCodec = new RunCodec(index, firstResults.betterCodec, secondResults.betterCodec); 484// } 485// } 486 487 protected byte[] encodeFlags(final String name, final long[][] flags, final BHSDCodec loCodec, final BHSDCodec hiCodec, final boolean haveHiFlags) 488 throws Pack200Exception { 489 return encodeFlags(name, flatten(flags), loCodec, hiCodec, haveHiFlags); 490 } 491 492 /** 493 * Encodes a single value with the given Codec. 494 * 495 * @param value the value to encode. 496 * @param codec Codec to use. 497 * @return the encoded value. 498 * @throws Pack200Exception If a Pack200 semantic error occurs. 499 */ 500 public byte[] encodeScalar(final int value, final BHSDCodec codec) throws Pack200Exception { 501 return codec.encode(value); 502 } 503 504 /** 505 * Encode a band without considering other Codecs. 506 * 507 * @param band the band. 508 * @param codec the Codec to use. 509 * @return the encoded band. 510 * @throws Pack200Exception If a Pack200 semantic error occurs. 511 */ 512 public byte[] encodeScalar(final int[] band, final BHSDCodec codec) throws Pack200Exception { 513 return codec.encode(band); 514 } 515 516 private void encodeWithPopulationCodec(final int[] band, final BHSDCodec defaultCodec, final BandData bandData, final BandAnalysisResults results) 517 throws Pack200Exception { 518 results.numCodecsTried += 3; // quite a bit more effort to try this codec 519 final Map<Integer, Integer> distinctValues = bandData.distinctValues; 520 521 final List<Integer> favored = new ArrayList<>(); 522 distinctValues.forEach((k, v) -> { 523 if (v.intValue() > 2 || distinctValues.size() < 256) { // TODO: tweak 524 favored.add(k); 525 } 526 }); 527 528 // Sort the favored list with the most commonly occurring first 529 if (distinctValues.size() > 255) { 530 favored.sort((arg0, arg1) -> distinctValues.get(arg1).compareTo(distinctValues.get(arg0))); 531 } 532 533 final Map<Integer, Integer> favoredToIndex = new HashMap<>(); 534 for (int i = 0; i < favored.size(); i++) { 535 favoredToIndex.put(favored.get(i), Integer.valueOf(i)); 536 } 537 538 final IntList unfavoured = new IntList(); 539 final int[] tokens = new int[band.length]; 540 for (int i = 0; i < band.length; i++) { 541 final Integer favouredIndex = favoredToIndex.get(Integer.valueOf(band[i])); 542 if (favouredIndex == null) { 543 tokens[i] = 0; 544 unfavoured.add(band[i]); 545 } else { 546 tokens[i] = favouredIndex.intValue() + 1; 547 } 548 } 549 favored.add(favored.get(favored.size() - 1)); // repeat last value 550 final int[] favouredBand = integerListToArray(favored); 551 final int[] unfavouredBand = unfavoured.toArray(); 552 553 // Analyse the three bands to get the best codec 554 final BandAnalysisResults favouredResults = analyseBand("POPULATION", favouredBand, defaultCodec); 555 final BandAnalysisResults unfavouredResults = analyseBand("POPULATION", unfavouredBand, defaultCodec); 556 557 int tdefL = 0; 558 int l = 0; 559 Codec tokenCodec = null; 560 final byte[] tokensEncoded; 561 final int k = favored.size() - 1; 562 if (k < 256) { 563 tdefL = 1; 564 tokensEncoded = Codec.BYTE1.encode(tokens); 565 } else { 566 final BandAnalysisResults tokenResults = analyseBand("POPULATION", tokens, defaultCodec); 567 tokenCodec = tokenResults.betterCodec; 568 tokensEncoded = tokenResults.encodedBand; 569 if (tokenCodec == null) { 570 tokenCodec = defaultCodec; 571 } 572 l = ((BHSDCodec) tokenCodec).getL(); 573 final int h = ((BHSDCodec) tokenCodec).getH(); 574 final int s = ((BHSDCodec) tokenCodec).getS(); 575 final int b = ((BHSDCodec) tokenCodec).getB(); 576 final int d = ((BHSDCodec) tokenCodec).isDelta() ? 1 : 0; 577 if (s == 0 && d == 0) { 578 boolean canUseTDefL = true; 579 if (b > 1) { 580 final BHSDCodec oneLowerB = new BHSDCodec(b - 1, h); 581 if (oneLowerB.largest() >= k) { 582 canUseTDefL = false; 583 } 584 } 585 if (canUseTDefL) { 586 switch (l) { 587 case 4: 588 tdefL = 1; 589 break; 590 case 8: 591 tdefL = 2; 592 break; 593 case 16: 594 tdefL = 3; 595 break; 596 case 32: 597 tdefL = 4; 598 break; 599 case 64: 600 tdefL = 5; 601 break; 602 case 128: 603 tdefL = 6; 604 break; 605 case 192: 606 tdefL = 7; 607 break; 608 case 224: 609 tdefL = 8; 610 break; 611 case 240: 612 tdefL = 9; 613 break; 614 case 248: 615 tdefL = 10; 616 break; 617 case 252: 618 tdefL = 11; 619 break; 620 } 621 } 622 } 623 } 624 625 final byte[] favouredEncoded = favouredResults.encodedBand; 626 final byte[] unfavouredEncoded = unfavouredResults.encodedBand; 627 final Codec favouredCodec = favouredResults.betterCodec; 628 final Codec unfavouredCodec = unfavouredResults.betterCodec; 629 630 int specifier = 141 + (favouredCodec == null ? 1 : 0) + 4 * tdefL + (unfavouredCodec == null ? 2 : 0); 631 final IntList extraBandMetadata = new IntList(3); 632 if (favouredCodec != null) { 633 IntStream.of(CodecEncoding.getSpecifier(favouredCodec, null)).forEach(extraBandMetadata::add); 634 } 635 if (tdefL == 0) { 636 IntStream.of(CodecEncoding.getSpecifier(tokenCodec, null)).forEach(extraBandMetadata::add); 637 } 638 if (unfavouredCodec != null) { 639 IntStream.of(CodecEncoding.getSpecifier(unfavouredCodec, null)).forEach(extraBandMetadata::add); 640 } 641 final int[] extraMetadata = extraBandMetadata.toArray(); 642 final byte[] extraMetadataEncoded = Codec.UNSIGNED5.encode(extraMetadata); 643 if (defaultCodec.isSigned()) { 644 specifier = -1 - specifier; 645 } else { 646 specifier += defaultCodec.getL(); 647 } 648 final byte[] firstValueEncoded = defaultCodec.encode(new int[] { specifier }); 649 final int totalBandLength = firstValueEncoded.length + favouredEncoded.length + tokensEncoded.length + unfavouredEncoded.length; 650 651 if (totalBandLength + extraMetadataEncoded.length < results.encodedBand.length) { 652 results.saved += results.encodedBand.length - (totalBandLength + extraMetadataEncoded.length); 653 final byte[] encodedBand = new byte[totalBandLength]; 654 System.arraycopy(firstValueEncoded, 0, encodedBand, 0, firstValueEncoded.length); 655 System.arraycopy(favouredEncoded, 0, encodedBand, firstValueEncoded.length, favouredEncoded.length); 656 System.arraycopy(tokensEncoded, 0, encodedBand, firstValueEncoded.length + favouredEncoded.length, tokensEncoded.length); 657 System.arraycopy(unfavouredEncoded, 0, encodedBand, firstValueEncoded.length + favouredEncoded.length + tokensEncoded.length, 658 unfavouredEncoded.length); 659 results.encodedBand = encodedBand; 660 results.extraMetadata = extraMetadata; 661 if (l != 0) { 662 results.betterCodec = new PopulationCodec(favouredCodec, l, unfavouredCodec); 663 } else { 664 results.betterCodec = new PopulationCodec(favouredCodec, tokenCodec, unfavouredCodec); 665 } 666 } 667 } 668 669 /* 670 * Flatten a 2-dimension array into a 1-dimension array 671 */ 672 private long[] flatten(final long[][] flags) { 673 int totalSize = 0; 674 for (final long[] flag : flags) { 675 totalSize += flag.length; 676 } 677 final long[] flatArray = new long[totalSize]; 678 int index = 0; 679 for (final long[] flag : flags) { 680 for (final long element : flag) { 681 flatArray[index] = element; 682 index++; 683 } 684 } 685 return flatArray; 686 } 687 688 /** 689 * Converts a list of Integers to an int[] array. 690 * 691 * @param integerList conversion source. 692 * @return conversion result. 693 */ 694 protected int[] integerListToArray(final List<Integer> integerList) { 695 return integerList.stream().mapToInt(Integer::intValue).toArray(); 696 } 697 698 /** 699 * Converts a list of Longs to an long[] array. 700 * 701 * @param longList conversion source. 702 * @return conversion result. 703 */ 704 protected long[] longListToArray(final List<Long> longList) { 705 return longList.stream().mapToLong(Long::longValue).toArray(); 706 } 707 708 /** 709 * Writes the packed set of bands to the given output stream. 710 * 711 * @param out TODO 712 * @throws IOException If an I/O error occurs. 713 * @throws Pack200Exception If a Pack200 semantic error occurs. 714 */ 715 public abstract void pack(OutputStream out) throws IOException, Pack200Exception; 716 717 private boolean timeToStop(final BandAnalysisResults results) { 718 // if tried more than effort number of codecs for this band then return 719 // Note: these values have been tuned - please test carefully if changing them 720 if (effort > 6) { 721 return results.numCodecsTried >= effort * 2; 722 } 723 return results.numCodecsTried >= effort; 724 // May want to also check how much we've saved if performance needs improving, for example saved more than effort*2 % 725 // || (float) results.saved/(float) results.encodedBand.length > (float) effort * 2/100; 726 } 727 728 private void tryCodecs(final int[] band, final BHSDCodec defaultCodec, final BandData bandData, final BandAnalysisResults results, final byte[] encoded, 729 final BHSDCodec[] potentialCodecs) throws Pack200Exception { 730 for (final BHSDCodec potential : potentialCodecs) { 731 if (potential.equals(defaultCodec)) { 732 return; // don't try codecs with greater cardinality in the same 'family' as the default codec as there 733 // won't be any savings 734 } 735 if (potential.isDelta()) { 736 if (potential.largest() >= bandData.largestDelta && potential.smallest() <= bandData.smallestDelta && potential.largest() >= bandData.largest 737 && potential.smallest() <= bandData.smallest) { 738 // TODO: can represent some negative deltas with overflow 739 final byte[] encoded2 = potential.encode(band); 740 results.numCodecsTried++; 741 final byte[] specifierEncoded = defaultCodec.encode(CodecEncoding.getSpecifier(potential, null)); 742 final int saved = encoded.length - encoded2.length - specifierEncoded.length; 743 if (saved > results.saved) { 744 results.betterCodec = potential; 745 results.encodedBand = encoded2; 746 results.saved = saved; 747 } 748 } 749 } else if (potential.largest() >= bandData.largest && potential.smallest() <= bandData.smallest) { 750 final byte[] encoded2 = potential.encode(band); 751 results.numCodecsTried++; 752 final byte[] specifierEncoded = defaultCodec.encode(CodecEncoding.getSpecifier(potential, null)); 753 final int saved = encoded.length - encoded2.length - specifierEncoded.length; 754 if (saved > results.saved) { 755 results.betterCodec = potential; 756 results.encodedBand = encoded2; 757 results.saved = saved; 758 } 759 } 760 if (timeToStop(results)) { 761 return; 762 } 763 } 764 } 765 766}